diff --git a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
index 7f4c11f820..dd8c14a54f 100644
--- a/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.mad/CODEGEN_mad_ee_mumu_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005466938018798828 [0m
+[1;32mDEBUG: model prefixing  takes 0.005581378936767578 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -158,20 +153,20 @@ INFO: Process has 2 diagrams
 Total: 1 processes with 2 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_ee_mumu --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_ee_mumu 
 INFO: remove old information in CODEGEN_mad_ee_mumu 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
@@ -181,30 +176,25 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
-[1;34mWARNING: vector code for lepton pdf not implemented. We removed the option to run dressed lepton [0m
 INFO: Generating Feynman diagrams for Process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group epem_mupmum 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
-Wrote files for 8 helas calls in 0.081 s
+Wrote files for 8 helas calls in 0.073 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
-ALOHA: aloha creates 3 routines in  0.203 s
+ALOHA: aloha creates 3 routines in  0.206 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 7 routines in  0.258 s
+ALOHA: aloha creates 7 routines in  0.262 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -213,38 +203,40 @@ ALOHA: aloha creates 7 routines in  0.258 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/SubProcesses/P1_epem_mupmum; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 242 (offset 9 lines).
+Hunk #2 succeeded at 236 (offset 22 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.059s
-user	0m1.802s
-sys	0m0.254s
+real	0m2.540s
+user	0m1.832s
+sys	0m0.262s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
@@ -258,7 +250,7 @@ Code generation completed in 2 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -266,9 +258,9 @@ Code generation completed in 2 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -288,7 +280,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -296,9 +288,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_ee_mumu/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/param_card.dat b/epochX/cudacpp/ee_mumu.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/param_card_default.dat b/epochX/cudacpp/ee_mumu.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat
index 9b246807bc..694519b041 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/ee_mumu.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat b/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat
index 69a8fac41b..74f70b567b 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/run_card.dat
@@ -145,7 +145,7 @@
 # Maximum and minimum absolute rapidity (for max, -1 means no cut)   *
 #*********************************************************************
  2.5  = etal    ! max rap for the charged leptons 
- 0.0  = etalmin ! main rap for the charged leptons
+ 0.0  = etalmin ! min rap for the charged leptons
  {} = eta_min_pdg ! rap cut for other particles (use pdg code). Applied on particle and anti-particle
  {} = eta_max_pdg ! rap cut for other particles (syntax e.g. {6: 2.5, 23: 5})
 #*********************************************************************
@@ -199,6 +199,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat b/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat
index 1f7d945f88..83399afb42 100644
--- a/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/ee_mumu.mad/Cards/run_card_default.dat
@@ -145,7 +145,7 @@
 # Maximum and minimum absolute rapidity (for max, -1 means no cut)   *
 #*********************************************************************
  2.5  = etal    ! max rap for the charged leptons 
- 0.0  = etalmin ! main rap for the charged leptons
+ 0.0  = etalmin ! min rap for the charged leptons
  {} = eta_min_pdg ! rap cut for other particles (use pdg code). Applied on particle and anti-particle
  {} = eta_max_pdg ! rap cut for other particles (syntax e.g. {6: 2.5, 23: 5})
 #*********************************************************************
@@ -199,6 +199,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt b/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/ee_mumu.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings1.f
index 5272ec5da5..e4dad93f44 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings1.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_3 = -(MDL_EE*MDL_COMPLEXI)
       GC_50 = -(MDL_CW*MDL_EE*MDL_COMPLEXI)/(2.000000D+00*MDL_SW)
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings2.f
index 28daa93647..30f3a04e3b 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings2.f
@@ -2,16 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings3.f
index 2cebce3012..d691cab086 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/couplings3.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makefile b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/printout.f b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/ee_mumu.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/dsample.f b/epochX/cudacpp/ee_mumu.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/dsample.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/eepdf.inc b/epochX/cudacpp/ee_mumu.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/genps.inc b/epochX/cudacpp/ee_mumu.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/genps.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/run.inc b/epochX/cudacpp/ee_mumu.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/run.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/run_card.inc b/epochX/cudacpp/ee_mumu.mad/Source/run_card.inc
index 97eaca8ead..80d5ae41aa 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/run_card.inc
+++ b/epochX/cudacpp/ee_mumu.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/ee_mumu.mad/Source/setrun.f b/epochX/cudacpp/ee_mumu.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/ee_mumu.mad/Source/setrun.f
+++ b/epochX/cudacpp/ee_mumu.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
index 2ec087a2a6..b6fb333a80 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h
index fd18ce5fcd..d236db5397 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f
index 0eaee07165..3c54117efb 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f
index 67089678cd..252b6d4e2b 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -54,8 +54,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
       DOUBLE PRECISION DUMMY_COMPONENTS(N_EE)
-      DOUBLE PRECISION EP1_COMPONENTS(N_EE)
-      DOUBLE PRECISION EM2_COMPONENTS(N_EE)
+      DOUBLE PRECISION EP1_COMPONENTS(N_EE )
+      DOUBLE PRECISION EM2_COMPONENTS(N_EE )
 
       INTEGER I_EE
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -141,7 +143,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
           QSCALE=DSQRT(Q2FACT(IB(1)))
         ENDIF
         EP1=PDG2PDF(LPP(IB(1)),-11, IB(1),XBK(IB(1)), QSCALE)
-        IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4) =
+        IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4 ) =
      $    EE_COMPONENTS(1:4)
       ENDIF
       IF (ABS(LPP(IB(2))).GE.1) THEN
@@ -150,7 +152,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
           QSCALE=DSQRT(Q2FACT(IB(2)))
         ENDIF
         EM2=PDG2PDF(LPP(IB(2)),11, IB(2),XBK(IB(2)), QSCALE)
-        IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4) =
+        IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4 ) =
      $    EE_COMPONENTS(1:4)
       ENDIF
       PD(0) = 0D0
@@ -226,7 +228,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -290,13 +292,13 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
       DOUBLE PRECISION DUMMY_COMPONENTS(N_EE)
-      DOUBLE PRECISION EP1_COMPONENTS(N_EE)
-      DOUBLE PRECISION EM2_COMPONENTS(N_EE)
+      DOUBLE PRECISION EP1_COMPONENTS(N_EE , VECSIZE_MEMMAX)
+      DOUBLE PRECISION EM2_COMPONENTS(N_EE , VECSIZE_MEMMAX)
 
       INTEGER I_EE
 C     
@@ -332,9 +334,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -350,6 +353,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -381,11 +386,15 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C           LP=SIGN(1,LPP(IB(1)))
             EP1(IVEC)=PDG2PDF(LPP(IB(1)),-11, IB(1),ALL_XBK(IB(1),IVEC)
      $       ,DSQRT(ALL_Q2FACT(IB(1), IVEC)))
+            IF (PDLABEL.EQ.'dressed') EP1_COMPONENTS(1:4 , IVEC) =
+     $        EE_COMPONENTS(1:4)
           ENDIF
           IF (ABS(LPP(IB(2))).GE.1) THEN
 C           LP=SIGN(1,LPP(IB(2)))
             EM2(IVEC)=PDG2PDF(LPP(IB(2)),11, IB(2),ALL_XBK(IB(2),IVEC)
      $       ,DSQRT(ALL_Q2FACT(IB(2), IVEC)))
+            IF (PDLABEL.EQ.'dressed') EM2_COMPONENTS(1:4 , IVEC) =
+     $        EE_COMPONENTS(1:4)
           ENDIF
         ENDDO  ! IWARP LOOP
       ENDDO  ! CURRWARP LOOP
@@ -394,6 +403,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       IPROC=IPROC+1  ! e+ e- > mu+ mu-
       DO IVEC=1, VECSIZE_USED
         ALL_PD(IPROC,IVEC)=EP1(IVEC)*EM2(IVEC)
+        IF (PDLABEL.EQ.'dressed')ALL_PD(IPROC,IVEC)
+     $   =EE_COMP_PROD(EP1_COMPONENTS(1,IVEC),EM2_COMPONENTS(1,IVEC))
         ALL_PD(0,IVEC)=ALL_PD(0,IVEC)+DABS(ALL_PD(IPROC,IVEC))
 
       ENDDO
@@ -497,11 +508,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -601,9 +607,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -716,3 +724,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f
index b1a73743d7..986be9742a 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1, 1, 1,-1/
       DATA (NHEL(I,   2),I=1,4) /-1, 1, 1, 1/
       DATA (NHEL(I,   3),I=1,4) /-1, 1,-1,-1/
@@ -143,8 +140,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -153,11 +149,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=1
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=1
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -167,12 +163,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -193,7 +188,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -222,35 +218,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -331,7 +325,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/ee_mumu.mad/SubProcesses/proc_characteristics
index 95773bcf0c..c43d2203f9 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/proc_characteristics
@@ -16,7 +16,9 @@
  pdg_initial2 = [11] 
  splitting_types = [] 
  perturbation_order = [] 
- limitations = ['dressed_ee'] 
+ limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/refine.sh b/epochX/cudacpp/ee_mumu.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f b/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/ee_mumu.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/cluster.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/combine_runs.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/file_writers.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/histograms.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/ee_mumu.mad/bin/internal/systematics.py b/epochX/cudacpp/ee_mumu.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/ee_mumu.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/ee_mumu.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
index 74d9a80c1a..6346ca1a7d 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc
index 1c04685404..27f4fc30b5 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h
index 73cc8bea5b..03f22d7c06 100644
--- a/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/ee_mumu.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
index 70a442bb6f..fcd0d9109e 100644
--- a/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
+++ b/epochX/cudacpp/ee_mumu.sa/CODEGEN_cudacpp_ee_mumu_log.txt
@@ -2,6 +2,7 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
+('WARNING: loading of madgraph too slow!!!', 1.1174366474151611)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -14,7 +15,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +46,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +58,7 @@ generate e+ e- > mu+ mu-
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.0058383941650390625 [0m
+[1;32mDEBUG: model prefixing  takes 0.00561213493347168 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -154,17 +150,16 @@ INFO: Checking for minimal orders which gives processes.
 INFO: Please specify coupling orders to bypass this step. 
 INFO: Trying process: e+ e- > mu+ mu- WEIGHTED<=4 @1  
 INFO: Process has 2 diagrams 
-1 processes with 2 diagrams generated in 0.004 s
+1 processes with 2 diagrams generated in 0.005 s
 Total: 1 processes with 2 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_ee_mumu
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: e+ e- > mu+ mu- WEIGHTED<=4 @1 
 INFO: Processing color information for process: e+ e- > mu+ mu- @1 
@@ -173,17 +168,17 @@ INFO: Processing color information for process: e+ e- > mu+ mu- @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/SubProcesses/P1_Sigma_sm_epem_mupmum/. 
 Generated helas calls for 1 subprocesses (2 diagrams) in 0.004 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates FFV4 routines[0m
 ALOHA: aloha creates FFV2_4 routines[0m
-ALOHA: aloha creates 4 routines in  0.269 s
+ALOHA: aloha creates 4 routines in  0.281 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
@@ -192,17 +187,17 @@ ALOHA: aloha creates 4 routines in  0.269 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2_4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_ee_mumu/src/. 
 quit
 
-real	0m0.667s
-user	0m0.596s
-sys	0m0.059s
-Code generation completed in 1 seconds
+real	0m1.777s
+user	0m1.371s
+sys	0m0.095s
+Code generation completed in 2 seconds
diff --git a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
index efc4366a5c..451b75637f 100644
--- a/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.mad/CODEGEN_mad_gg_tt_log.txt
@@ -2,7 +2,6 @@
 This version is intended for development/beta testing and NOT for production.
 This version has not been fully tested (if at all) and might have limited user support (if at all)[0m
 Running MG5 in debug mode
-('WARNING: loading of madgraph too slow!!!', 0.986461877822876)
 ************************************************************
 *                                                          *
 *                     W E L C O M E to                     *
@@ -49,7 +48,7 @@ Note that you can still compile and run aMC@NLO with the built-in PDFs
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -58,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005642414093017578 [0m
+[1;32mDEBUG: model prefixing  takes 0.005722761154174805 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -158,18 +157,17 @@ Load PLUGIN.CUDACPP_OUTPUT
 [1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  args = [0m ['--hel_recycling=False', '--vector_size=32', '--hel_recycling=False', '--me_exporter=standalone_simd', '--nb_wrap=1'] [1;30m[madgraph_interface.py at line 8230][0m [0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3481][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt 
 INFO: remove old information in CODEGEN_mad_gg_tt 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
@@ -179,59 +177,60 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.094 s
+Wrote files for 10 helas calls in 0.073 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.139 s
+ALOHA: aloha creates 2 routines in  0.149 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.137 s
+ALOHA: aloha creates 4 routines in  0.135 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
+Hunk #2 succeeded at 227 (offset 13 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.907s
-user	0m2.416s
-sys	0m0.299s
+real	0m1.917s
+user	0m1.636s
+sys	0m0.269s
 Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
@@ -253,9 +252,9 @@ Code generation completed in 2 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -283,9 +282,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt
index 68b4c46295..4f5079f78a 100644
--- a/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gg_tt.mad/Cards/me5_configuration.txt
@@ -235,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings.f
index 748af3ae72..04d6bb5333 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings.f
@@ -10,10 +10,13 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
@@ -53,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -86,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings2.f
index aa02de33c5..30f3a04e3b 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings2.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP2( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings3.f
index c77bd60b5a..2d4127fa27 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/makefile b/epochX/cudacpp/gg_tt.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gg_tt.mad/Source/MODEL/printout.f b/epochX/cudacpp/gg_tt.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gg_tt.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gg_tt.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gg_tt.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gg_tt.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gg_tt.mad/Source/eepdf.inc b/epochX/cudacpp/gg_tt.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_tt.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gg_tt.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
index 4def093ebd..51cde595e2 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -126,7 +128,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 
 
       IF (ABS(LPP(IB(1))).GE.1) THEN
-          !LP=SIGN(1,LPP(IB(1)))
+C       LP=SIGN(1,LPP(IB(1)))
         IF (DSQRT(Q2FACT(IB(1))).EQ.0D0) THEN
           QSCALE=0D0
           DO I=3,NEXTERNAL
@@ -140,7 +142,7 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
         G1=PDG2PDF(LPP(IB(1)),0, IB(1),XBK(IB(1)), QSCALE)
       ENDIF
       IF (ABS(LPP(IB(2))).GE.1) THEN
-          !LP=SIGN(1,LPP(IB(2)))
+C       LP=SIGN(1,LPP(IB(2)))
         IF (DSQRT(Q2FACT(IB(2))).NE.0D0) THEN
           QSCALE=DSQRT(Q2FACT(IB(2)))
         ENDIF
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -366,12 +371,12 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
         DO IWARP=1, WARP_SIZE
           IVEC = (CURR_WARP-1)*WARP_SIZE+IWARP
           IF (ABS(LPP(IB(1))).GE.1) THEN
-              !LP=SIGN(1,LPP(IB(1)))
+C           LP=SIGN(1,LPP(IB(1)))
             G1(IVEC)=PDG2PDF(LPP(IB(1)),0, IB(1),ALL_XBK(IB(1),IVEC)
      $       ,DSQRT(ALL_Q2FACT(IB(1), IVEC)))
           ENDIF
           IF (ABS(LPP(IB(2))).GE.1) THEN
-              !LP=SIGN(1,LPP(IB(2)))
+C           LP=SIGN(1,LPP(IB(2)))
             G2(IVEC)=PDG2PDF(LPP(IB(2)),0, IB(2),ALL_XBK(IB(2),IVEC)
      $       ,DSQRT(ALL_Q2FACT(IB(2), IVEC)))
           ENDIF
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -704,3 +706,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
index 069c74ef46..2233869649 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
@@ -149,18 +149,20 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
       ENDDO
 
-        !   If the helicity grid status is 0, this means that it is not yet initialized.
-        !   If HEL_PICKED==-1, this means that calls to other matrix<i> where in initialization mode as well for the helicity.
+C     If the helicity grid status is 0, this means that it is not yet
+C      initialized.
+C     If HEL_PICKED==-1, this means that calls to other matrix<i>
+C      where in initialization mode as well for the helicity.
       IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
@@ -182,16 +184,27 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
-            !         We set HEL_PICKED to -1 here so that later on, the call to DS_add_point in dsample.f does not add anything to the grid since it was already done here.
+C         We set HEL_PICKED to -1 here so that later on, the call to
+C          DS_add_point in dsample.f does not add anything to the grid
+C          since it was already done here.
           HEL_PICKED = -1
-            !         For safety, hardset the helicity sampling jacobian to 0.0d0 to make sure it is not .
+C         For safety, hardset the helicity sampling jacobian to 0.0d0
+C          to make sure it is not .
           HEL_JACOBIAN   = 1.0D0
-            !         We don't want to re-update the helicity grid if it was already updated by another matrix<i>, so we make sure that the reference grid is empty.
+C         We don't want to re-update the helicity grid if it was
+C          already updated by another matrix<i>, so we make sure that
+C          the reference grid is empty.
           REF_HELICITY_GRID = DS_GET_DIMENSION(REF_GRID,'Helicity')
           IF((DS_GET_DIM_STATUS('Helicity').EQ.1)
      $     .AND.(REF_HELICITY_GRID%N_TOT_ENTRIES.EQ.0)) THEN
-              !           If we finished the initialization we can update the grid so as to start sampling over it.
-              !           However the grid will now be filled by dsample with different kind of weights (including pdf, flux, etc...) so by setting the grid_mode of the reference grid to 'initialization' we make sure it will be overwritten (as opposed to 'combined') by the running grid at the next update.
+C           If we finished the initialization we can update the grid
+C            so as to start sampling over it.
+C           However the grid will now be filled by dsample with
+C            different kind of weights (including pdf, flux, etc...)
+C            so by setting the grid_mode of the reference grid to
+C            'initialization' we make sure it will be overwritten (as
+C            opposed to 'combined') by the running grid at the next
+C            update.
             CALL DS_UPDATE_GRID('Helicity')
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gg_tt.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
index 286913623c..842f43e67a 100755
--- a/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gg_tt.mad/bin/internal/banner.py
@@ -3184,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3314,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3330,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3506,8 +3532,10 @@ def write_include_file(self, output_dir, output_file=None):
         #ensusre that system only parameter are correctly set
         self.update_system_parameter_for_include()
 
-        value_in_old_include = self.get_last_value_include(output_dir)
-
+        if output_dir: #output_dir is set to None in some unittest
+            value_in_old_include = self.get_last_value_include(output_dir)
+        else:
+           value_in_old_include = {} 
 
         if output_dir:
             self.write_autodef(output_dir, output_file=None)
@@ -3524,7 +3552,6 @@ def write_include_file(self, output_dir, output_file=None):
     def write_one_include_file(self, output_dir, incname, output_file=None):
         """write one include file at the time"""
 
-        misc.sprint(incname)
         if incname is True:
             pathinc = self.default_include_file
         elif incname is False:
diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/file_writers.py b/epochX/cudacpp/gg_tt.mad/bin/internal/file_writers.py
index ac9c46e53e..526756129f 100755
--- a/epochX/cudacpp/gg_tt.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gg_tt.mad/bin/internal/file_writers.py
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!\$)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
         pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
diff --git a/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py
index 693b19367f..f6e47956cd 100755
--- a/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gg_tt.mad/bin/internal/lhe_parser.py
@@ -1067,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
diff --git a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
index 379d3e24b4..0ee963873f 100644
--- a/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
+++ b/epochX/cudacpp/gg_tt.sa/CODEGEN_cudacpp_gg_tt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005359172821044922 [0m
+[1;32mDEBUG: model prefixing  takes 0.005754947662353516 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -159,13 +154,12 @@ INFO: Process has 3 diagrams
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
@@ -174,30 +168,30 @@ INFO: Processing color information for process: g g > t t~ @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/SubProcesses/P1_Sigma_sm_gg_ttx/. 
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.145 s
+ALOHA: aloha creates 2 routines in  0.147 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_tt/src/. 
 quit
 
-real	0m0.557s
-user	0m0.478s
-sys	0m0.057s
+real	0m0.547s
+user	0m0.488s
+sys	0m0.047s
 Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
index c5e2f4d9fe..b05f9d17ae 100644
--- a/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/CODEGEN_mad_gg_tt01g_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005418300628662109 [0m
+[1;32mDEBUG: model prefixing  takes 0.005604743957519531 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,7 +150,7 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.009 s
+1 processes with 3 diagrams generated in 0.008 s
 Total: 1 processes with 3 diagrams
 add process g g > t t~ g
 INFO: Checking for minimal orders which gives processes. 
@@ -163,24 +158,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @2  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.021 s
+1 processes with 16 diagrams generated in 0.020 s
 Total: 2 processes with 19 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_tt01g --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_tt01g 
 INFO: remove old information in CODEGEN_mad_gg_tt01g 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Processing color information for process: g g > t t~ g @2 
@@ -192,13 +187,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
@@ -208,32 +198,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 2 subprocesses (19 diagrams) in 0.044 s
-Wrote files for 46 helas calls in 0.208 s
+Generated helas calls for 2 subprocesses (19 diagrams) in 0.043 s
+Wrote files for 46 helas calls in 0.192 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.328 s
+ALOHA: aloha creates 5 routines in  0.332 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.317 s
+ALOHA: aloha creates 10 routines in  0.318 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -243,41 +229,47 @@ ALOHA: aloha creates 10 routines in  0.317 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #2 succeeded at 227 (offset 13 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/SubProcesses/P2_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 249 (offset 16 lines).
+Hunk #2 succeeded at 243 (offset 29 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.670s
-user	0m2.325s
-sys	0m0.289s
+real	0m2.620s
+user	0m2.318s
+sys	0m0.298s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
@@ -291,7 +283,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -299,9 +291,9 @@ Code generation completed in 3 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -321,7 +313,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -329,9 +321,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_tt01g/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/param_card.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/param_card_default.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat
index 1b2fc5f0b6..862c94ebe6 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat
index 00b4e5f92c..1711d30371 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card.dat
@@ -178,6 +178,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat
index a94a9d3a15..07d42df6c1 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/gg_tt01g.mad/Cards/run_card_default.dat
@@ -178,6 +178,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt b/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings2.f
index e638b28035..30f3a04e3b 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_12(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings3.f
index f537dd3764..ad696f2865 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makefile b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/printout.f b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/dsample.f b/epochX/cudacpp/gg_tt01g.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/dsample.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/eepdf.inc b/epochX/cudacpp/gg_tt01g.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/genps.inc b/epochX/cudacpp/gg_tt01g.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/genps.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/run.inc b/epochX/cudacpp/gg_tt01g.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/run.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/run_card.inc b/epochX/cudacpp/gg_tt01g.mad/Source/run_card.inc
index 22d8b7aaa9..2588190439 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/run_card.inc
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/gg_tt01g.mad/Source/setrun.f b/epochX/cudacpp/gg_tt01g.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/gg_tt01g.mad/Source/setrun.f
+++ b/epochX/cudacpp/gg_tt01g.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index 4942958c59..cf0c87de10 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
index d11c697a5f..132108c2fa 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
index ef6cba5d03..ff10374363 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
index e00e2f9e40..51cde595e2 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -704,3 +706,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/driver.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/driver.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f
index 710fd12977..2233869649 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P1_gg_ttx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1,-1,-1, 1/
       DATA (NHEL(I,   2),I=1,4) /-1,-1,-1,-1/
       DATA (NHEL(I,   3),I=1,4) /-1,-1, 1, 1/
@@ -143,8 +140,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -153,11 +149,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -167,12 +163,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -184,7 +179,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -213,35 +209,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -313,7 +307,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
index be182f91bc..ff58257c0e 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h
index e94f9acfb2..b1a51cefe9 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f
index 6a15061a04..8bb6a95aa0 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f
index d9d48565b5..608ec15cda 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -720,3 +722,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/driver.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/driver.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f
index 0c713c8fcf..b695473cac 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/P2_gg_ttxg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1,-1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) /-1,-1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) /-1,-1,-1,-1,-1/
@@ -159,8 +156,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -169,11 +165,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -183,12 +179,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -200,7 +195,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -229,35 +225,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -329,7 +323,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics
index 6711fb7544..633ee016af 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/refine.sh b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gg_tt01g.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/cluster.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/combine_runs.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/file_writers.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/histograms.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/gg_tt01g.mad/bin/internal/systematics.py b/epochX/cudacpp/gg_tt01g.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/gg_tt01g.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/gg_tt01g.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
index 5742cd4648..3737f655e9 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_tt01g.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc
index ebf4ab1ae2..ac2438f9e4 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h
index 58ba9df50f..237c63739d 100644
--- a/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/gg_tt01g.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
index 9e24abd275..f2e4616cd9 100644
--- a/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.mad/CODEGEN_mad_gg_ttg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005491018295288086 [0m
+[1;32mDEBUG: model prefixing  takes 0.00537419319152832 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,24 +150,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=3: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g WEIGHTED<=3 @1  
 INFO: Process has 16 diagrams 
-1 processes with 16 diagrams generated in 0.023 s
+1 processes with 16 diagrams generated in 0.022 s
 Total: 1 processes with 16 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttg 
 INFO: remove old information in CODEGEN_mad_gg_ttg 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
@@ -182,32 +177,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.040 s
-Wrote files for 36 helas calls in 0.138 s
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
+Wrote files for 36 helas calls in 0.122 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.333 s
+ALOHA: aloha creates 5 routines in  0.334 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 10 routines in  0.317 s
+ALOHA: aloha creates 10 routines in  0.320 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -217,39 +208,41 @@ ALOHA: aloha creates 10 routines in  0.317 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 249 (offset 16 lines).
+Hunk #2 succeeded at 243 (offset 29 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.227s
-user	0m2.210s
-sys	0m0.272s
-Code generation completed in 3 seconds
+real	0m2.478s
+user	0m2.208s
+sys	0m0.251s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -262,7 +255,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -270,9 +263,9 @@ Code generation completed in 3 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -292,7 +285,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -300,9 +293,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/param_card.dat b/epochX/cudacpp/gg_ttg.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/param_card_default.dat b/epochX/cudacpp/gg_ttg.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat
index 72d7a0efd4..b78bb56b1e 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/gg_ttg.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat
index c545f67660..d087670827 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/run_card.dat
@@ -160,6 +160,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat
index 06d9e5c206..7f66363ba0 100644
--- a/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/gg_ttg.mad/Cards/run_card_default.dat
@@ -160,6 +160,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/gg_ttg.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings2.f
index e638b28035..30f3a04e3b 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_12(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings3.f
index f537dd3764..ad696f2865 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makefile b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/printout.f b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gg_ttg.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/dsample.f b/epochX/cudacpp/gg_ttg.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/dsample.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/eepdf.inc b/epochX/cudacpp/gg_ttg.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/genps.inc b/epochX/cudacpp/gg_ttg.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/genps.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/run.inc b/epochX/cudacpp/gg_ttg.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/run.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/run_card.inc b/epochX/cudacpp/gg_ttg.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/run_card.inc
+++ b/epochX/cudacpp/gg_ttg.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/gg_ttg.mad/Source/setrun.f b/epochX/cudacpp/gg_ttg.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/gg_ttg.mad/Source/setrun.f
+++ b/epochX/cudacpp/gg_ttg.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index 6a72bd0004..c117c80635 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
index 4cf0de0da5..1336561d98 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
index e443f9d74a..18102513a2 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
index c9392f3f6a..feae52d0d1 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -720,3 +722,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/driver.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/driver.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f
index a8c7e012bf..80c1d61cc8 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1,-1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) /-1,-1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) /-1,-1,-1,-1,-1/
@@ -159,8 +156,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -169,11 +165,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -183,12 +179,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -200,7 +195,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -229,35 +225,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -329,7 +323,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics
index 119c7424dc..c11ed5a292 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/refine.sh b/epochX/cudacpp/gg_ttg.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gg_ttg.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/cluster.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/combine_runs.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/file_writers.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/histograms.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/gg_ttg.mad/bin/internal/systematics.py b/epochX/cudacpp/gg_ttg.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/gg_ttg.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/gg_ttg.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
index 5742cd4648..3737f655e9 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttg.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc
index ebf4ab1ae2..ac2438f9e4 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h
index 58ba9df50f..237c63739d 100644
--- a/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/gg_ttg.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
index 11ec8bfcdd..c0f0257969 100644
--- a/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
+++ b/epochX/cudacpp/gg_ttg.sa/CODEGEN_cudacpp_gg_ttg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005768537521362305 [0m
+[1;32mDEBUG: model prefixing  takes 0.005707740783691406 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -159,13 +154,12 @@ INFO: Process has 16 diagrams
 Total: 1 processes with 16 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttg
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Processing color information for process: g g > t t~ g @1 
@@ -174,18 +168,18 @@ INFO: Processing color information for process: g g > t t~ g @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
-Generated helas calls for 1 subprocesses (16 diagrams) in 0.039 s
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/SubProcesses/P1_Sigma_sm_gg_ttxg/. 
+Generated helas calls for 1 subprocesses (16 diagrams) in 0.038 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV3 set of routines with options: P0[0m
 ALOHA: aloha creates VVVV4 set of routines with options: P0[0m
-ALOHA: aloha creates 5 routines in  0.331 s
+ALOHA: aloha creates 5 routines in  0.334 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -195,17 +189,17 @@ ALOHA: aloha creates 5 routines in  0.331 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttg/src/. 
 quit
 
-real	0m1.054s
-user	0m0.733s
-sys	0m0.055s
+real	0m0.791s
+user	0m0.726s
+sys	0m0.056s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
index 220b4c8811..65f27cc918 100644
--- a/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/CODEGEN_mad_gg_ttgg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005507946014404297 [0m
+[1;32mDEBUG: model prefixing  takes 0.005572319030761719 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,24 +150,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.158 s
+1 processes with 123 diagrams generated in 0.164 s
 Total: 1 processes with 123 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttgg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttgg 
 INFO: remove old information in CODEGEN_mad_gg_ttgg 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
@@ -182,32 +177,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.430 s
-Wrote files for 222 helas calls in 0.678 s
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.437 s
+Wrote files for 222 helas calls in 0.682 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.332 s
+ALOHA: aloha creates 5 routines in  0.341 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.316 s
+ALOHA: aloha creates 10 routines in  0.322 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -220,38 +211,40 @@ ALOHA: aloha creates 10 routines in  0.316 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/SubProcesses/P1_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 281 (offset 48 lines).
+Hunk #2 succeeded at 275 (offset 61 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.834s
-user	0m3.505s
-sys	0m0.298s
+real	0m3.904s
+user	0m3.584s
+sys	0m0.268s
 Code generation completed in 4 seconds
 ************************************************************
 *                                                          *
@@ -265,7 +258,7 @@ Code generation completed in 4 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -273,9 +266,9 @@ Code generation completed in 4 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -295,7 +288,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -303,9 +296,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttgg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/param_card.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/param_card_default.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat
index f4efb79920..d53bfad86b 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat
index a1e27dfa2b..ecdc7fd25c 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card.dat
@@ -185,6 +185,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat
index b8ea7253a9..35153b7e3f 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/gg_ttgg.mad/Cards/run_card_default.dat
@@ -185,6 +185,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings2.f
index e638b28035..30f3a04e3b 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_12(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings3.f
index f537dd3764..ad696f2865 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makefile b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/printout.f b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/dsample.f b/epochX/cudacpp/gg_ttgg.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/dsample.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/eepdf.inc b/epochX/cudacpp/gg_ttgg.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/genps.inc b/epochX/cudacpp/gg_ttgg.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/genps.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/run.inc b/epochX/cudacpp/gg_ttgg.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/run.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/run_card.inc b/epochX/cudacpp/gg_ttgg.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/run_card.inc
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/gg_ttgg.mad/Source/setrun.f b/epochX/cudacpp/gg_ttgg.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/gg_ttgg.mad/Source/setrun.f
+++ b/epochX/cudacpp/gg_ttgg.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
index 63aa2a8900..b9a359a354 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h
index a0f4d7d7b5..6d43b06ac6 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f
index 10d3eb1082..04661b1cdf 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f
index 51653e4244..52b70294cc 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -752,3 +754,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/driver.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/driver.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f
index c00a648e15..66a35ffb59 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 1,-1/
@@ -191,8 +188,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -201,11 +197,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=24
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=24
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -215,12 +211,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -232,7 +227,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -261,35 +257,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -361,7 +355,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics
index 61b8ce0c6c..13deb6127f 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/refine.sh b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gg_ttgg.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/cluster.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/combine_runs.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/file_writers.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/histograms.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/gg_ttgg.mad/bin/internal/systematics.py b/epochX/cudacpp/gg_ttgg.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/gg_ttgg.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/gg_ttgg.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
index bcf4333c78..e127a8504d 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttgg.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc
index ebf4ab1ae2..ac2438f9e4 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h
index 58ba9df50f..237c63739d 100644
--- a/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/gg_ttgg.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
index 78efab871b..837f944753 100644
--- a/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
+++ b/epochX/cudacpp/gg_ttgg.sa/CODEGEN_cudacpp_gg_ttgg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005625247955322266 [0m
+[1;32mDEBUG: model prefixing  takes 0.005681276321411133 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,17 +150,16 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g WEIGHTED<=4 @1  
 INFO: Process has 123 diagrams 
-1 processes with 123 diagrams generated in 0.160 s
+1 processes with 123 diagrams generated in 0.161 s
 Total: 1 processes with 123 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttgg
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ g g @1 
@@ -174,18 +168,18 @@ INFO: Processing color information for process: g g > t t~ g g @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
-Generated helas calls for 1 subprocesses (123 diagrams) in 0.427 s
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/SubProcesses/P1_Sigma_sm_gg_ttxgg/. 
+Generated helas calls for 1 subprocesses (123 diagrams) in 0.436 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.321 s
+ALOHA: aloha creates 5 routines in  0.323 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -198,17 +192,17 @@ ALOHA: aloha creates 5 routines in  0.321 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttgg/src/. 
 quit
 
-real	0m1.534s
-user	0m1.381s
-sys	0m0.064s
-Code generation completed in 2 seconds
+real	0m1.468s
+user	0m1.395s
+sys	0m0.062s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
index f6104016a5..2b142b4e41 100644
--- a/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/CODEGEN_mad_gg_ttggg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005656003952026367 [0m
+[1;32mDEBUG: model prefixing  takes 0.005638599395751953 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,24 +150,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.910 s
+1 processes with 1240 diagrams generated in 1.917 s
 Total: 1 processes with 1240 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gg_ttggg --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gg_ttggg 
 INFO: remove old information in CODEGEN_mad_gg_ttggg 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Processing color information for process: g g > t t~ g g g @1 
@@ -184,32 +179,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 0, 3, 4, 0, 5, 6, 0, 0, 0, 0, 0, 7, 8, 9, 0, 10, 11, 12, 0, 13, 14, 15, 0, 16, 17, 18, 19, 20, 21, 0, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 0, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 69, 70, 71, 72, 73, 74, 75, 0, 76, 77, 78, 79, 80, 81, 82, 83, 84, 0, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 0, 0, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 0, 121, 122, 0, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 0, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 0, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 0, 197, 198, 199, 200, 201, 202, 0, 203, 204, 205, 206, 207, 208, 0, 209, 210, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 0, 226, 227, 0, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 0, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 0, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 0, 302, 303, 304, 305, 306, 307, 0, 308, 309, 310, 311, 312, 313, 0, 314, 315, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 316, 317, 318, 319, 320, 321, 0, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 0, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 0, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 0, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 0, 378, 379, 0, 380, 381, 0, 0, 0, 0, 0, 382, 383, 384, 385, 386, 387, 388, 389, 390, 0, 391, 392, 393, 394, 395, 396, 397, 398, 399, 0, 400, 401, 402, 403, 404, 405, 406, 407, 408, 0, 409, 410, 411, 412, 413, 414, 0, 415, 416, 417, 418, 419, 420, 0, 0, 0, 421, 422, 423, 424, 425, 426, 0, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 0, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 0, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 0, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 0, 483, 484, 0, 485, 486, 0, 0, 0, 0, 0, 487, 488, 489, 490, 491, 492, 493, 494, 495, 0, 496, 497, 498, 499, 500, 501, 502, 503, 504, 0, 505, 506, 507, 508, 509, 510, 511, 512, 513, 0, 514, 515, 516, 517, 518, 519, 0, 520, 521, 522, 523, 524, 525, 0, 0, 0, 526, 527, 528, 529, 530, 531, 0, 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 546, 0, 547, 548, 549, 550, 551, 552, 553, 554, 555, 556, 557, 558, 559, 560, 561, 0, 562, 563, 564, 565, 566, 567, 568, 569, 570, 571, 572, 573, 574, 575, 576, 0, 577, 578, 579, 580, 581, 582, 583, 584, 585, 586, 587, 0, 588, 589, 0, 590, 591, 0, 0, 0, 0, 0, 592, 593, 594, 595, 596, 597, 598, 599, 600, 0, 601, 602, 603, 604, 605, 606, 607, 608, 609, 0, 610, 611, 612, 613, 614, 615, 616, 617, 618, 0, 619, 620, 621, 622, 623, 624, 0, 625, 626, 627, 628, 629, 630, 0, 0, 0, 631, 632, 633, 634, 635, 636, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 647, 648, 649, 650, 651, 652, 653, 654, 655, 656, 657, 658, 659, 660, 661, 662, 663, 0, 664, 665, 666, 667, 668, 669, 0, 670, 671, 672, 673, 674, 675, 0, 0, 0, 676, 677, 678, 679, 680, 681, 682, 683, 684, 685, 686, 687, 688, 689, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 700, 701, 702, 703, 704, 705, 706, 707, 708, 0, 709, 710, 711, 712, 713, 714, 0, 715, 716, 717, 718, 719, 720, 0, 0, 0, 721, 722, 0, 723, 724, 0, 725, 726, 0, 0, 0, 0, 0, 727, 728, 729, 730, 731, 732, 733, 734, 735, 0, 736, 737, 738, 739, 740, 741, 742, 743, 744, 0, 745, 746, 747, 748, 749, 750, 751, 752, 753, 0, 754, 755, 756, 757, 758, 759, 0, 760, 761, 762, 763, 764, 765, 766, 767, 0, 768, 769, 0, 770, 771, 0, 0, 0, 0, 0, 772, 773, 774, 775, 776, 777, 778, 779, 780, 0, 781, 782, 783, 784, 785, 786, 787, 788, 789, 0, 790, 791, 792, 793, 794, 795, 796, 797, 798, 0, 799, 800, 801, 802, 803, 804, 0, 805, 806, 807, 808, 809, 810, 811, 812, 0, 813, 814, 0, 815, 816, 0, 0, 0, 0, 0, 817, 818, 819, 820, 821, 822, 823, 824, 825, 0, 826, 827, 828, 829, 830, 831, 832, 833, 834, 0, 835, 836, 837, 838, 839, 840, 841, 842, 843, 0, 844, 845, 846, 847, 848, 849, 0, 850, 851, 852, 853, 854, 855, 856, 857, 0, 858, 859, 0, 860, 861, 0, 0, 0, 0, 862, 863, 0, 864, 865, 0, 866, 867, 0, 0, 0, 0, 868, 869, 0, 870, 871, 0, 872, 873, 0, 0, 0, 0, 0, 0, 0, 874, 875, 876, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 888, 889, 890, 891, 0, 892, 893, 894, 895, 896, 897, 898, 899, 900, 901, 902, 903, 904, 905, 906, 907, 908, 909, 0, 910, 911, 912, 913, 914, 915, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 0, 928, 929, 930, 931, 932, 933, 0, 934, 935, 936, 937, 938, 939, 0, 940, 941, 942, 943, 944, 945, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxggg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 945 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 4, 4: 5, 5: 7, 6: 8, 7: 14, 8: 15, 9: 16, 10: 18, 11: 19, 12: 20, 13: 22, 14: 23, 15: 24, 16: 26, 17: 27, 18: 28, 19: 29, 20: 30, 21: 31, 22: 33, 23: 34, 24: 35, 25: 36, 26: 37, 27: 38, 28: 39, 29: 40, 30: 41, 31: 42, 32: 43, 33: 44, 34: 45, 35: 46, 36: 47, 37: 49, 38: 50, 39: 51, 40: 52, 41: 53, 42: 54, 43: 55, 44: 56, 45: 57, 46: 58, 47: 59, 48: 60, 49: 61, 50: 62, 51: 63, 52: 65, 53: 66, 54: 67, 55: 68, 56: 69, 57: 70, 58: 71, 59: 72, 60: 73, 61: 74, 62: 75, 63: 76, 64: 77, 65: 78, 66: 79, 67: 81, 68: 82, 69: 83, 70: 84, 71: 85, 72: 86, 73: 87, 74: 88, 75: 89, 76: 91, 77: 92, 78: 93, 79: 94, 80: 95, 81: 96, 82: 97, 83: 98, 84: 99, 85: 101, 86: 102, 87: 103, 88: 104, 89: 105, 90: 106, 91: 107, 92: 108, 93: 109, 94: 110, 95: 111, 96: 112, 97: 113, 98: 114, 99: 115, 100: 116, 101: 117, 102: 118, 103: 119, 104: 120, 105: 121, 106: 124, 107: 125, 108: 126, 109: 127, 110: 128, 111: 129, 112: 130, 113: 131, 114: 132, 115: 133, 116: 134, 117: 135, 118: 136, 119: 137, 120: 138, 121: 140, 122: 141, 123: 143, 124: 144, 125: 145, 126: 146, 127: 147, 128: 148, 129: 149, 130: 150, 131: 151, 132: 152, 133: 153, 134: 154, 135: 155, 136: 156, 137: 157, 138: 159, 139: 160, 140: 161, 141: 162, 142: 163, 143: 164, 144: 165, 145: 166, 146: 167, 147: 168, 148: 169, 149: 170, 150: 171, 151: 172, 152: 173, 153: 175, 154: 176, 155: 177, 156: 178, 157: 179, 158: 180, 159: 181, 160: 182, 161: 183, 162: 184, 163: 185, 164: 186, 165: 187, 166: 188, 167: 189, 168: 190, 169: 191, 170: 192, 171: 193, 172: 194, 173: 195, 174: 196, 175: 197, 176: 198, 177: 199, 178: 200, 179: 201, 180: 202, 181: 203, 182: 204, 183: 205, 184: 206, 185: 207, 186: 208, 187: 209, 188: 210, 189: 211, 190: 212, 191: 213, 192: 214, 193: 215, 194: 216, 195: 217, 196: 218, 197: 220, 198: 221, 199: 222, 200: 223, 201: 224, 202: 225, 203: 227, 204: 228, 205: 229, 206: 230, 207: 231, 208: 232, 209: 234, 210: 235, 211: 247, 212: 248, 213: 249, 214: 250, 215: 251, 216: 252, 217: 253, 218: 254, 219: 255, 220: 256, 221: 257, 222: 258, 223: 259, 224: 260, 225: 261, 226: 263, 227: 264, 228: 266, 229: 267, 230: 268, 231: 269, 232: 270, 233: 271, 234: 272, 235: 273, 236: 274, 237: 275, 238: 276, 239: 277, 240: 278, 241: 279, 242: 280, 243: 282, 244: 283, 245: 284, 246: 285, 247: 286, 248: 287, 249: 288, 250: 289, 251: 290, 252: 291, 253: 292, 254: 293, 255: 294, 256: 295, 257: 296, 258: 298, 259: 299, 260: 300, 261: 301, 262: 302, 263: 303, 264: 304, 265: 305, 266: 306, 267: 307, 268: 308, 269: 309, 270: 310, 271: 311, 272: 312, 273: 313, 274: 314, 275: 315, 276: 316, 277: 317, 278: 318, 279: 319, 280: 320, 281: 321, 282: 322, 283: 323, 284: 324, 285: 325, 286: 326, 287: 327, 288: 328, 289: 329, 290: 330, 291: 331, 292: 332, 293: 333, 294: 334, 295: 335, 296: 336, 297: 337, 298: 338, 299: 339, 300: 340, 301: 341, 302: 343, 303: 344, 304: 345, 305: 346, 306: 347, 307: 348, 308: 350, 309: 351, 310: 352, 311: 353, 312: 354, 313: 355, 314: 357, 315: 358, 316: 370, 317: 371, 318: 372, 319: 373, 320: 374, 321: 375, 322: 377, 323: 378, 324: 379, 325: 380, 326: 381, 327: 382, 328: 383, 329: 384, 330: 385, 331: 386, 332: 387, 333: 388, 334: 389, 335: 390, 336: 391, 337: 393, 338: 394, 339: 395, 340: 396, 341: 397, 342: 398, 343: 399, 344: 400, 345: 401, 346: 402, 347: 403, 348: 404, 349: 405, 350: 406, 351: 407, 352: 409, 353: 410, 354: 411, 355: 412, 356: 413, 357: 414, 358: 415, 359: 416, 360: 417, 361: 418, 362: 419, 363: 420, 364: 421, 365: 422, 366: 423, 367: 425, 368: 426, 369: 427, 370: 428, 371: 429, 372: 430, 373: 431, 374: 432, 375: 433, 376: 434, 377: 435, 378: 437, 379: 438, 380: 440, 381: 441, 382: 447, 383: 448, 384: 449, 385: 450, 386: 451, 387: 452, 388: 453, 389: 454, 390: 455, 391: 457, 392: 458, 393: 459, 394: 460, 395: 461, 396: 462, 397: 463, 398: 464, 399: 465, 400: 467, 401: 468, 402: 469, 403: 470, 404: 471, 405: 472, 406: 473, 407: 474, 408: 475, 409: 477, 410: 478, 411: 479, 412: 480, 413: 481, 414: 482, 415: 484, 416: 485, 417: 486, 418: 487, 419: 488, 420: 489, 421: 493, 422: 494, 423: 495, 424: 496, 425: 497, 426: 498, 427: 500, 428: 501, 429: 502, 430: 503, 431: 504, 432: 505, 433: 506, 434: 507, 435: 508, 436: 509, 437: 510, 438: 511, 439: 512, 440: 513, 441: 514, 442: 516, 443: 517, 444: 518, 445: 519, 446: 520, 447: 521, 448: 522, 449: 523, 450: 524, 451: 525, 452: 526, 453: 527, 454: 528, 455: 529, 456: 530, 457: 532, 458: 533, 459: 534, 460: 535, 461: 536, 462: 537, 463: 538, 464: 539, 465: 540, 466: 541, 467: 542, 468: 543, 469: 544, 470: 545, 471: 546, 472: 548, 473: 549, 474: 550, 475: 551, 476: 552, 477: 553, 478: 554, 479: 555, 480: 556, 481: 557, 482: 558, 483: 560, 484: 561, 485: 563, 486: 564, 487: 570, 488: 571, 489: 572, 490: 573, 491: 574, 492: 575, 493: 576, 494: 577, 495: 578, 496: 580, 497: 581, 498: 582, 499: 583, 500: 584, 501: 585, 502: 586, 503: 587, 504: 588, 505: 590, 506: 591, 507: 592, 508: 593, 509: 594, 510: 595, 511: 596, 512: 597, 513: 598, 514: 600, 515: 601, 516: 602, 517: 603, 518: 604, 519: 605, 520: 607, 521: 608, 522: 609, 523: 610, 524: 611, 525: 612, 526: 616, 527: 617, 528: 618, 529: 619, 530: 620, 531: 621, 532: 623, 533: 624, 534: 625, 535: 626, 536: 627, 537: 628, 538: 629, 539: 630, 540: 631, 541: 632, 542: 633, 543: 634, 544: 635, 545: 636, 546: 637, 547: 639, 548: 640, 549: 641, 550: 642, 551: 643, 552: 644, 553: 645, 554: 646, 555: 647, 556: 648, 557: 649, 558: 650, 559: 651, 560: 652, 561: 653, 562: 655, 563: 656, 564: 657, 565: 658, 566: 659, 567: 660, 568: 661, 569: 662, 570: 663, 571: 664, 572: 665, 573: 666, 574: 667, 575: 668, 576: 669, 577: 671, 578: 672, 579: 673, 580: 674, 581: 675, 582: 676, 583: 677, 584: 678, 585: 679, 586: 680, 587: 681, 588: 683, 589: 684, 590: 686, 591: 687, 592: 693, 593: 694, 594: 695, 595: 696, 596: 697, 597: 698, 598: 699, 599: 700, 600: 701, 601: 703, 602: 704, 603: 705, 604: 706, 605: 707, 606: 708, 607: 709, 608: 710, 609: 711, 610: 713, 611: 714, 612: 715, 613: 716, 614: 717, 615: 718, 616: 719, 617: 720, 618: 721, 619: 723, 620: 724, 621: 725, 622: 726, 623: 727, 624: 728, 625: 730, 626: 731, 627: 732, 628: 733, 629: 734, 630: 735, 631: 739, 632: 740, 633: 741, 634: 742, 635: 743, 636: 744, 637: 745, 638: 746, 639: 747, 640: 748, 641: 749, 642: 750, 643: 751, 644: 752, 645: 753, 646: 754, 647: 755, 648: 756, 649: 757, 650: 758, 651: 759, 652: 760, 653: 761, 654: 762, 655: 763, 656: 764, 657: 765, 658: 766, 659: 767, 660: 768, 661: 769, 662: 770, 663: 771, 664: 773, 665: 774, 666: 775, 667: 776, 668: 777, 669: 778, 670: 780, 671: 781, 672: 782, 673: 783, 674: 784, 675: 785, 676: 789, 677: 790, 678: 791, 679: 792, 680: 793, 681: 794, 682: 795, 683: 796, 684: 797, 685: 798, 686: 799, 687: 800, 688: 801, 689: 802, 690: 803, 691: 804, 692: 805, 693: 806, 694: 807, 695: 808, 696: 809, 697: 810, 698: 811, 699: 812, 700: 813, 701: 814, 702: 815, 703: 816, 704: 817, 705: 818, 706: 819, 707: 820, 708: 821, 709: 823, 710: 824, 711: 825, 712: 826, 713: 827, 714: 828, 715: 830, 716: 831, 717: 832, 718: 833, 719: 834, 720: 835, 721: 839, 722: 840, 723: 842, 724: 843, 725: 845, 726: 846, 727: 852, 728: 853, 729: 854, 730: 855, 731: 856, 732: 857, 733: 858, 734: 859, 735: 860, 736: 862, 737: 863, 738: 864, 739: 865, 740: 866, 741: 867, 742: 868, 743: 869, 744: 870, 745: 872, 746: 873, 747: 874, 748: 875, 749: 876, 750: 877, 751: 878, 752: 879, 753: 880, 754: 882, 755: 883, 756: 884, 757: 885, 758: 886, 759: 887, 760: 889, 761: 890, 762: 891, 763: 892, 764: 893, 765: 894, 766: 895, 767: 896, 768: 898, 769: 899, 770: 901, 771: 902, 772: 908, 773: 909, 774: 910, 775: 911, 776: 912, 777: 913, 778: 914, 779: 915, 780: 916, 781: 918, 782: 919, 783: 920, 784: 921, 785: 922, 786: 923, 787: 924, 788: 925, 789: 926, 790: 928, 791: 929, 792: 930, 793: 931, 794: 932, 795: 933, 796: 934, 797: 935, 798: 936, 799: 938, 800: 939, 801: 940, 802: 941, 803: 942, 804: 943, 805: 945, 806: 946, 807: 947, 808: 948, 809: 949, 810: 950, 811: 951, 812: 952, 813: 954, 814: 955, 815: 957, 816: 958, 817: 964, 818: 965, 819: 966, 820: 967, 821: 968, 822: 969, 823: 970, 824: 971, 825: 972, 826: 974, 827: 975, 828: 976, 829: 977, 830: 978, 831: 979, 832: 980, 833: 981, 834: 982, 835: 984, 836: 985, 837: 986, 838: 987, 839: 988, 840: 989, 841: 990, 842: 991, 843: 992, 844: 994, 845: 995, 846: 996, 847: 997, 848: 998, 849: 999, 850: 1001, 851: 1002, 852: 1003, 853: 1004, 854: 1005, 855: 1006, 856: 1007, 857: 1008, 858: 1010, 859: 1011, 860: 1013, 861: 1014, 862: 1019, 863: 1020, 864: 1022, 865: 1023, 866: 1025, 867: 1026, 868: 1031, 869: 1032, 870: 1034, 871: 1035, 872: 1037, 873: 1038, 874: 1046, 875: 1047, 876: 1048, 877: 1049, 878: 1050, 879: 1051, 880: 1052, 881: 1053, 882: 1054, 883: 1055, 884: 1056, 885: 1057, 886: 1058, 887: 1059, 888: 1060, 889: 1061, 890: 1062, 891: 1063, 892: 1065, 893: 1066, 894: 1067, 895: 1068, 896: 1069, 897: 1070, 898: 1071, 899: 1072, 900: 1073, 901: 1074, 902: 1075, 903: 1076, 904: 1077, 905: 1078, 906: 1079, 907: 1080, 908: 1081, 909: 1082, 910: 1084, 911: 1085, 912: 1086, 913: 1087, 914: 1088, 915: 1089, 916: 1090, 917: 1091, 918: 1092, 919: 1093, 920: 1094, 921: 1095, 922: 1096, 923: 1097, 924: 1098, 925: 1099, 926: 1100, 927: 1101, 928: 1103, 929: 1104, 930: 1105, 931: 1106, 932: 1107, 933: 1108, 934: 1110, 935: 1111, 936: 1112, 937: 1113, 938: 1114, 939: 1115, 940: 1117, 941: 1118, 942: 1119, 943: 1120, 944: 1121, 945: 1122} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 4: 3, 5: 4, 7: 5, 8: 6, 14: 7, 15: 8, 16: 9, 18: 10, 19: 11, 20: 12, 22: 13, 23: 14, 24: 15, 26: 16, 27: 17, 28: 18, 29: 19, 30: 20, 31: 21, 33: 22, 34: 23, 35: 24, 36: 25, 37: 26, 38: 27, 39: 28, 40: 29, 41: 30, 42: 31, 43: 32, 44: 33, 45: 34, 46: 35, 47: 36, 49: 37, 50: 38, 51: 39, 52: 40, 53: 41, 54: 42, 55: 43, 56: 44, 57: 45, 58: 46, 59: 47, 60: 48, 61: 49, 62: 50, 63: 51, 65: 52, 66: 53, 67: 54, 68: 55, 69: 56, 70: 57, 71: 58, 72: 59, 73: 60, 74: 61, 75: 62, 76: 63, 77: 64, 78: 65, 79: 66, 81: 67, 82: 68, 83: 69, 84: 70, 85: 71, 86: 72, 87: 73, 88: 74, 89: 75, 91: 76, 92: 77, 93: 78, 94: 79, 95: 80, 96: 81, 97: 82, 98: 83, 99: 84, 101: 85, 102: 86, 103: 87, 104: 88, 105: 89, 106: 90, 107: 91, 108: 92, 109: 93, 110: 94, 111: 95, 112: 96, 113: 97, 114: 98, 115: 99, 116: 100, 117: 101, 118: 102, 119: 103, 120: 104, 121: 105, 124: 106, 125: 107, 126: 108, 127: 109, 128: 110, 129: 111, 130: 112, 131: 113, 132: 114, 133: 115, 134: 116, 135: 117, 136: 118, 137: 119, 138: 120, 140: 121, 141: 122, 143: 123, 144: 124, 145: 125, 146: 126, 147: 127, 148: 128, 149: 129, 150: 130, 151: 131, 152: 132, 153: 133, 154: 134, 155: 135, 156: 136, 157: 137, 159: 138, 160: 139, 161: 140, 162: 141, 163: 142, 164: 143, 165: 144, 166: 145, 167: 146, 168: 147, 169: 148, 170: 149, 171: 150, 172: 151, 173: 152, 175: 153, 176: 154, 177: 155, 178: 156, 179: 157, 180: 158, 181: 159, 182: 160, 183: 161, 184: 162, 185: 163, 186: 164, 187: 165, 188: 166, 189: 167, 190: 168, 191: 169, 192: 170, 193: 171, 194: 172, 195: 173, 196: 174, 197: 175, 198: 176, 199: 177, 200: 178, 201: 179, 202: 180, 203: 181, 204: 182, 205: 183, 206: 184, 207: 185, 208: 186, 209: 187, 210: 188, 211: 189, 212: 190, 213: 191, 214: 192, 215: 193, 216: 194, 217: 195, 218: 196, 220: 197, 221: 198, 222: 199, 223: 200, 224: 201, 225: 202, 227: 203, 228: 204, 229: 205, 230: 206, 231: 207, 232: 208, 234: 209, 235: 210, 247: 211, 248: 212, 249: 213, 250: 214, 251: 215, 252: 216, 253: 217, 254: 218, 255: 219, 256: 220, 257: 221, 258: 222, 259: 223, 260: 224, 261: 225, 263: 226, 264: 227, 266: 228, 267: 229, 268: 230, 269: 231, 270: 232, 271: 233, 272: 234, 273: 235, 274: 236, 275: 237, 276: 238, 277: 239, 278: 240, 279: 241, 280: 242, 282: 243, 283: 244, 284: 245, 285: 246, 286: 247, 287: 248, 288: 249, 289: 250, 290: 251, 291: 252, 292: 253, 293: 254, 294: 255, 295: 256, 296: 257, 298: 258, 299: 259, 300: 260, 301: 261, 302: 262, 303: 263, 304: 264, 305: 265, 306: 266, 307: 267, 308: 268, 309: 269, 310: 270, 311: 271, 312: 272, 313: 273, 314: 274, 315: 275, 316: 276, 317: 277, 318: 278, 319: 279, 320: 280, 321: 281, 322: 282, 323: 283, 324: 284, 325: 285, 326: 286, 327: 287, 328: 288, 329: 289, 330: 290, 331: 291, 332: 292, 333: 293, 334: 294, 335: 295, 336: 296, 337: 297, 338: 298, 339: 299, 340: 300, 341: 301, 343: 302, 344: 303, 345: 304, 346: 305, 347: 306, 348: 307, 350: 308, 351: 309, 352: 310, 353: 311, 354: 312, 355: 313, 357: 314, 358: 315, 370: 316, 371: 317, 372: 318, 373: 319, 374: 320, 375: 321, 377: 322, 378: 323, 379: 324, 380: 325, 381: 326, 382: 327, 383: 328, 384: 329, 385: 330, 386: 331, 387: 332, 388: 333, 389: 334, 390: 335, 391: 336, 393: 337, 394: 338, 395: 339, 396: 340, 397: 341, 398: 342, 399: 343, 400: 344, 401: 345, 402: 346, 403: 347, 404: 348, 405: 349, 406: 350, 407: 351, 409: 352, 410: 353, 411: 354, 412: 355, 413: 356, 414: 357, 415: 358, 416: 359, 417: 360, 418: 361, 419: 362, 420: 363, 421: 364, 422: 365, 423: 366, 425: 367, 426: 368, 427: 369, 428: 370, 429: 371, 430: 372, 431: 373, 432: 374, 433: 375, 434: 376, 435: 377, 437: 378, 438: 379, 440: 380, 441: 381, 447: 382, 448: 383, 449: 384, 450: 385, 451: 386, 452: 387, 453: 388, 454: 389, 455: 390, 457: 391, 458: 392, 459: 393, 460: 394, 461: 395, 462: 396, 463: 397, 464: 398, 465: 399, 467: 400, 468: 401, 469: 402, 470: 403, 471: 404, 472: 405, 473: 406, 474: 407, 475: 408, 477: 409, 478: 410, 479: 411, 480: 412, 481: 413, 482: 414, 484: 415, 485: 416, 486: 417, 487: 418, 488: 419, 489: 420, 493: 421, 494: 422, 495: 423, 496: 424, 497: 425, 498: 426, 500: 427, 501: 428, 502: 429, 503: 430, 504: 431, 505: 432, 506: 433, 507: 434, 508: 435, 509: 436, 510: 437, 511: 438, 512: 439, 513: 440, 514: 441, 516: 442, 517: 443, 518: 444, 519: 445, 520: 446, 521: 447, 522: 448, 523: 449, 524: 450, 525: 451, 526: 452, 527: 453, 528: 454, 529: 455, 530: 456, 532: 457, 533: 458, 534: 459, 535: 460, 536: 461, 537: 462, 538: 463, 539: 464, 540: 465, 541: 466, 542: 467, 543: 468, 544: 469, 545: 470, 546: 471, 548: 472, 549: 473, 550: 474, 551: 475, 552: 476, 553: 477, 554: 478, 555: 479, 556: 480, 557: 481, 558: 482, 560: 483, 561: 484, 563: 485, 564: 486, 570: 487, 571: 488, 572: 489, 573: 490, 574: 491, 575: 492, 576: 493, 577: 494, 578: 495, 580: 496, 581: 497, 582: 498, 583: 499, 584: 500, 585: 501, 586: 502, 587: 503, 588: 504, 590: 505, 591: 506, 592: 507, 593: 508, 594: 509, 595: 510, 596: 511, 597: 512, 598: 513, 600: 514, 601: 515, 602: 516, 603: 517, 604: 518, 605: 519, 607: 520, 608: 521, 609: 522, 610: 523, 611: 524, 612: 525, 616: 526, 617: 527, 618: 528, 619: 529, 620: 530, 621: 531, 623: 532, 624: 533, 625: 534, 626: 535, 627: 536, 628: 537, 629: 538, 630: 539, 631: 540, 632: 541, 633: 542, 634: 543, 635: 544, 636: 545, 637: 546, 639: 547, 640: 548, 641: 549, 642: 550, 643: 551, 644: 552, 645: 553, 646: 554, 647: 555, 648: 556, 649: 557, 650: 558, 651: 559, 652: 560, 653: 561, 655: 562, 656: 563, 657: 564, 658: 565, 659: 566, 660: 567, 661: 568, 662: 569, 663: 570, 664: 571, 665: 572, 666: 573, 667: 574, 668: 575, 669: 576, 671: 577, 672: 578, 673: 579, 674: 580, 675: 581, 676: 582, 677: 583, 678: 584, 679: 585, 680: 586, 681: 587, 683: 588, 684: 589, 686: 590, 687: 591, 693: 592, 694: 593, 695: 594, 696: 595, 697: 596, 698: 597, 699: 598, 700: 599, 701: 600, 703: 601, 704: 602, 705: 603, 706: 604, 707: 605, 708: 606, 709: 607, 710: 608, 711: 609, 713: 610, 714: 611, 715: 612, 716: 613, 717: 614, 718: 615, 719: 616, 720: 617, 721: 618, 723: 619, 724: 620, 725: 621, 726: 622, 727: 623, 728: 624, 730: 625, 731: 626, 732: 627, 733: 628, 734: 629, 735: 630, 739: 631, 740: 632, 741: 633, 742: 634, 743: 635, 744: 636, 745: 637, 746: 638, 747: 639, 748: 640, 749: 641, 750: 642, 751: 643, 752: 644, 753: 645, 754: 646, 755: 647, 756: 648, 757: 649, 758: 650, 759: 651, 760: 652, 761: 653, 762: 654, 763: 655, 764: 656, 765: 657, 766: 658, 767: 659, 768: 660, 769: 661, 770: 662, 771: 663, 773: 664, 774: 665, 775: 666, 776: 667, 777: 668, 778: 669, 780: 670, 781: 671, 782: 672, 783: 673, 784: 674, 785: 675, 789: 676, 790: 677, 791: 678, 792: 679, 793: 680, 794: 681, 795: 682, 796: 683, 797: 684, 798: 685, 799: 686, 800: 687, 801: 688, 802: 689, 803: 690, 804: 691, 805: 692, 806: 693, 807: 694, 808: 695, 809: 696, 810: 697, 811: 698, 812: 699, 813: 700, 814: 701, 815: 702, 816: 703, 817: 704, 818: 705, 819: 706, 820: 707, 821: 708, 823: 709, 824: 710, 825: 711, 826: 712, 827: 713, 828: 714, 830: 715, 831: 716, 832: 717, 833: 718, 834: 719, 835: 720, 839: 721, 840: 722, 842: 723, 843: 724, 845: 725, 846: 726, 852: 727, 853: 728, 854: 729, 855: 730, 856: 731, 857: 732, 858: 733, 859: 734, 860: 735, 862: 736, 863: 737, 864: 738, 865: 739, 866: 740, 867: 741, 868: 742, 869: 743, 870: 744, 872: 745, 873: 746, 874: 747, 875: 748, 876: 749, 877: 750, 878: 751, 879: 752, 880: 753, 882: 754, 883: 755, 884: 756, 885: 757, 886: 758, 887: 759, 889: 760, 890: 761, 891: 762, 892: 763, 893: 764, 894: 765, 895: 766, 896: 767, 898: 768, 899: 769, 901: 770, 902: 771, 908: 772, 909: 773, 910: 774, 911: 775, 912: 776, 913: 777, 914: 778, 915: 779, 916: 780, 918: 781, 919: 782, 920: 783, 921: 784, 922: 785, 923: 786, 924: 787, 925: 788, 926: 789, 928: 790, 929: 791, 930: 792, 931: 793, 932: 794, 933: 795, 934: 796, 935: 797, 936: 798, 938: 799, 939: 800, 940: 801, 941: 802, 942: 803, 943: 804, 945: 805, 946: 806, 947: 807, 948: 808, 949: 809, 950: 810, 951: 811, 952: 812, 954: 813, 955: 814, 957: 815, 958: 816, 964: 817, 965: 818, 966: 819, 967: 820, 968: 821, 969: 822, 970: 823, 971: 824, 972: 825, 974: 826, 975: 827, 976: 828, 977: 829, 978: 830, 979: 831, 980: 832, 981: 833, 982: 834, 984: 835, 985: 836, 986: 837, 987: 838, 988: 839, 989: 840, 990: 841, 991: 842, 992: 843, 994: 844, 995: 845, 996: 846, 997: 847, 998: 848, 999: 849, 1001: 850, 1002: 851, 1003: 852, 1004: 853, 1005: 854, 1006: 855, 1007: 856, 1008: 857, 1010: 858, 1011: 859, 1013: 860, 1014: 861, 1019: 862, 1020: 863, 1022: 864, 1023: 865, 1025: 866, 1026: 867, 1031: 868, 1032: 869, 1034: 870, 1035: 871, 1037: 872, 1038: 873, 1046: 874, 1047: 875, 1048: 876, 1049: 877, 1050: 878, 1051: 879, 1052: 880, 1053: 881, 1054: 882, 1055: 883, 1056: 884, 1057: 885, 1058: 886, 1059: 887, 1060: 888, 1061: 889, 1062: 890, 1063: 891, 1065: 892, 1066: 893, 1067: 894, 1068: 895, 1069: 896, 1070: 897, 1071: 898, 1072: 899, 1073: 900, 1074: 901, 1075: 902, 1076: 903, 1077: 904, 1078: 905, 1079: 906, 1080: 907, 1081: 908, 1082: 909, 1084: 910, 1085: 911, 1086: 912, 1087: 913, 1088: 914, 1089: 915, 1090: 916, 1091: 917, 1092: 918, 1093: 919, 1094: 920, 1095: 921, 1096: 922, 1097: 923, 1098: 924, 1099: 925, 1100: 926, 1101: 927, 1103: 928, 1104: 929, 1105: 930, 1106: 931, 1107: 932, 1108: 933, 1110: 934, 1111: 935, 1112: 936, 1113: 937, 1114: 938, 1115: 939, 1117: 940, 1118: 941, 1119: 942, 1120: 943, 1121: 944, 1122: 945} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.661 s
-Wrote files for 2281 helas calls in 18.701 s
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.738 s
+Wrote files for 2281 helas calls in 18.801 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.322 s
+ALOHA: aloha creates 5 routines in  0.329 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.367 s
+ALOHA: aloha creates 10 routines in  0.321 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -222,38 +213,40 @@ ALOHA: aloha creates 10 routines in  0.367 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/SubProcesses/P1_gg_ttxggg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 345 (offset 112 lines).
+Hunk #2 succeeded at 339 (offset 125 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m33.094s
-user	0m32.521s
-sys	0m0.443s
+real	0m33.303s
+user	0m32.735s
+sys	0m0.449s
 Code generation completed in 33 seconds
 ************************************************************
 *                                                          *
@@ -267,7 +260,7 @@ Code generation completed in 33 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -275,9 +268,9 @@ Code generation completed in 33 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -297,7 +290,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -305,9 +298,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gg_ttggg/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/param_card.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/param_card_default.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat
index c16335faca..c931dbb655 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat
index e1d6da3a51..a08f93d92b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card.dat
@@ -191,6 +191,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat
index 7985e9ab10..e9046320c6 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/gg_ttggg.mad/Cards/run_card_default.dat
@@ -191,6 +191,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt b/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings2.f
index e638b28035..30f3a04e3b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_12(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings3.f
index f537dd3764..ad696f2865 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makefile b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/printout.f b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/dsample.f b/epochX/cudacpp/gg_ttggg.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/dsample.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/eepdf.inc b/epochX/cudacpp/gg_ttggg.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/genps.inc b/epochX/cudacpp/gg_ttggg.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/genps.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/run.inc b/epochX/cudacpp/gg_ttggg.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/run.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/run_card.inc b/epochX/cudacpp/gg_ttggg.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/run_card.inc
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/gg_ttggg.mad/Source/setrun.f b/epochX/cudacpp/gg_ttggg.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/gg_ttggg.mad/Source/setrun.f
+++ b/epochX/cudacpp/gg_ttggg.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
index 34ac05600b..887e7fd63e 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h
index 76b0c3a844..5cceaea083 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f
index 6f04bdc5d5..95adb1231d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=128)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=128)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=128)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f
index 443ffbfd75..a1e98bce0d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -816,3 +818,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/driver.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/driver.f
index f6ed180095..3671cdce55 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/driver.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f
index 22f41b2861..47e0261337 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=128)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,7) /-1,-1,-1, 1,-1,-1,-1/
       DATA (NHEL(I,   2),I=1,7) /-1,-1,-1, 1,-1,-1, 1/
       DATA (NHEL(I,   3),I=1,7) /-1,-1,-1, 1,-1, 1,-1/
@@ -255,8 +252,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -265,11 +261,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=120
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=120
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -279,12 +275,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -296,7 +291,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -325,35 +321,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -425,7 +419,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics
index e7f6392d16..2a6bb58e2b 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/refine.sh b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gg_ttggg.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/cluster.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/combine_runs.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/file_writers.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/histograms.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/gg_ttggg.mad/bin/internal/systematics.py b/epochX/cudacpp/gg_ttggg.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/gg_ttggg.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/gg_ttggg.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
index bcf4333c78..e127a8504d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gg_ttggg.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc
index ebf4ab1ae2..ac2438f9e4 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h
index 58ba9df50f..237c63739d 100644
--- a/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/gg_ttggg.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
index e92f204f42..319ce47773 100644
--- a/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
+++ b/epochX/cudacpp/gg_ttggg.sa/CODEGEN_cudacpp_gg_ttggg_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -62,7 +57,7 @@ generate g g > t t~ g g g
 No model currently active, so we import the Standard Model
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005467414855957031 [0m
+[1;32mDEBUG: model prefixing  takes 0.005609273910522461 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -155,17 +150,16 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=5: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ g g g WEIGHTED<=5 @1  
 INFO: Process has 1240 diagrams 
-1 processes with 1240 diagrams generated in 1.898 s
+1 processes with 1240 diagrams generated in 1.925 s
 Total: 1 processes with 1240 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gg_ttggg
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g g WEIGHTED<=5 @1 
 INFO: Processing color information for process: g g > t t~ g g g @1 
@@ -174,18 +168,18 @@ INFO: Processing color information for process: g g > t t~ g g g @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
-Generated helas calls for 1 subprocesses (1240 diagrams) in 6.697 s
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/SubProcesses/P1_Sigma_sm_gg_ttxggg/. 
+Generated helas calls for 1 subprocesses (1240 diagrams) in 6.699 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.350 s
+ALOHA: aloha creates 5 routines in  0.354 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -198,17 +192,17 @@ ALOHA: aloha creates 5 routines in  0.350 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gg_ttggg/src/. 
 quit
 
-real	0m13.364s
-user	0m13.053s
-sys	0m0.114s
-Code generation completed in 14 seconds
+real	0m13.246s
+user	0m13.071s
+sys	0m0.121s
+Code generation completed in 13 seconds
diff --git a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
index 0173a6def8..ac9431cf42 100644
--- a/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.mad/CODEGEN_mad_gq_ttq_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -61,7 +56,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005719423294067383 [0m
+[1;32mDEBUG: model prefixing  takes 0.0056684017181396484 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,24 +165,24 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.079 s
+8 processes with 40 diagrams generated in 0.080 s
 Total: 8 processes with 40 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_gq_ttq --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_gq_ttq 
 INFO: remove old information in CODEGEN_mad_gq_ttq 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Processing color information for process: g u > t t~ u @1 
@@ -205,13 +200,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
@@ -221,70 +211,71 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 2 subprocesses (10 diagrams) in 0.032 s
-Wrote files for 32 helas calls in 0.184 s
+Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
+Wrote files for 32 helas calls in 0.167 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.148 s
+ALOHA: aloha creates 2 routines in  0.147 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.134 s
+ALOHA: aloha creates 4 routines in  0.135 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 259 (offset 26 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 254 (offset 40 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 259 (offset 26 lines).
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 254 (offset 40 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.246s
-user	0m1.907s
-sys	0m0.311s
-Code generation completed in 3 seconds
+real	0m2.217s
+user	0m1.918s
+sys	0m0.291s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -297,7 +288,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -305,9 +296,9 @@ Code generation completed in 3 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -327,7 +318,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -335,9 +326,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_gq_ttq/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/param_card.dat b/epochX/cudacpp/gq_ttq.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/param_card_default.dat b/epochX/cudacpp/gq_ttq.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat
index deab56cf41..6213a9324b 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/gq_ttq.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat b/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat
index b016597d23..66a805e521 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/run_card.dat
@@ -162,6 +162,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat b/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat
index e0dabc3b73..8c0f1e2199 100644
--- a/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/gq_ttq.mad/Cards/run_card_default.dat
@@ -162,6 +162,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt b/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/gq_ttq.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings.f b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings2.f
index be9e9f5a39..30f3a04e3b 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings2.f
@@ -2,18 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
       END
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings3.f
index c77bd60b5a..2d4127fa27 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makefile b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/printout.f b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/gq_ttq.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/dsample.f b/epochX/cudacpp/gq_ttq.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/dsample.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/eepdf.inc b/epochX/cudacpp/gq_ttq.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/genps.inc b/epochX/cudacpp/gq_ttq.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/genps.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/run.inc b/epochX/cudacpp/gq_ttq.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/run.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/run_card.inc b/epochX/cudacpp/gq_ttq.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/run_card.inc
+++ b/epochX/cudacpp/gq_ttq.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/gq_ttq.mad/Source/setrun.f b/epochX/cudacpp/gq_ttq.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/gq_ttq.mad/Source/setrun.f
+++ b/epochX/cudacpp/gq_ttq.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index f0179d1f0d..03174bc366 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
index 5200762ff8..9ee06a68ef 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
index 3a34c54b34..eb78c27eb4 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
index 5f8fcf53e4..d06b3bf8c7 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -764,3 +766,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/driver.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/driver.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f
index 1fd572a5a8..42e162b388 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1, 1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) /-1, 1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) /-1, 1,-1,-1,-1/
@@ -162,8 +159,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -172,11 +168,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -186,16 +182,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -210,7 +206,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -239,35 +236,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -280,7 +275,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -345,7 +341,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index bacd28895f..0e0543d09a 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
index 99ec68e177..01422c303b 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
index 995579a722..eb8a8e76e4 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
index 27ef220e77..d02ea2f3b3 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -764,3 +766,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/driver.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/driver.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f
index b428147f4e..d2886d52ab 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1,-1,-1, 1, 1/
       DATA (NHEL(I,   2),I=1,5) /-1,-1,-1, 1,-1/
       DATA (NHEL(I,   3),I=1,5) /-1,-1,-1,-1, 1/
@@ -162,8 +159,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -172,11 +168,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -186,16 +182,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -210,7 +206,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -239,35 +236,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -280,7 +275,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -345,7 +341,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics
index 76a6154ffb..14c5bc5d13 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = False 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/refine.sh b/epochX/cudacpp/gq_ttq.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f b/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/gq_ttq.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/cluster.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/combine_runs.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/file_writers.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/histograms.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/gq_ttq.mad/bin/internal/systematics.py b/epochX/cudacpp/gq_ttq.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/gq_ttq.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/gq_ttq.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
index 81f8722a61..97736b72f1 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/gq_ttq.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc
index 7bc8829718..2406333084 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h
index b87598312a..f7eaa14615 100644
--- a/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/gq_ttq.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
index a2edcd0739..7f98293b71 100644
--- a/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
+++ b/epochX/cudacpp/gq_ttq.sa/CODEGEN_cudacpp_gq_ttq_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -61,7 +56,7 @@ set zerowidth_tchannel F
 define q = u c d s u~ c~ d~ s~
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005497932434082031 [0m
+[1;32mDEBUG: model prefixing  takes 0.0057294368743896484 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -170,17 +165,16 @@ INFO: Crossed process found for g u~ > t t~ u~, reuse diagrams.
 INFO: Crossed process found for g c~ > t t~ c~, reuse diagrams. 
 INFO: Crossed process found for g d~ > t t~ d~, reuse diagrams. 
 INFO: Crossed process found for g s~ > t t~ s~, reuse diagrams. 
-8 processes with 40 diagrams generated in 0.079 s
+8 processes with 40 diagrams generated in 0.080 s
 Total: 8 processes with 40 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_gq_ttq
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Processing color information for process: g u > t t~ u @1 
@@ -197,40 +191,40 @@ INFO: Combined process g s~ > t t~ s~ WEIGHTED<=3 @1 with process g u~ > t t~ u~
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gu_ttxu/. 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.generate_subprocess_directory (create the directory) [1;30m[output.py at line 212][0m [0m
 [1;32mDEBUG:    type(subproc_group)=<class 'madgraph.core.helas_objects.HelasMatrixElement'> [1;30m[output.py at line 213][0m [0m
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=1 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/SubProcesses/P1_Sigma_sm_gux_ttxux/. 
 Generated helas calls for 2 subprocesses (10 diagrams) in 0.031 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.145 s
+ALOHA: aloha creates 2 routines in  0.147 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_gq_ttq/src/. 
 quit
 
-real	0m0.756s
-user	0m0.592s
-sys	0m0.059s
+real	0m0.658s
+user	0m0.600s
+sys	0m0.052s
 Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
index b5c70a5837..0d7d52c915 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/CODEGEN_mad_heft_gg_bb_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,22 +45,17 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 set auto_convert_model T
 save options auto_convert_model
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model heft
 INFO: Restrict model heft with file models/heft/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
@@ -131,20 +126,20 @@ INFO: Process has 4 diagrams
 Total: 1 processes with 4 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_heft_gg_bb --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_heft_gg_bb 
 INFO: remove old information in CODEGEN_mad_heft_gg_bb 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
@@ -154,68 +149,67 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_bbx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 4 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.009 s
-Wrote files for 12 helas calls in 0.087 s
+Wrote files for 12 helas calls in 0.078 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.265 s
+ALOHA: aloha creates 4 routines in  0.270 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 8 routines in  0.251 s
+ALOHA: aloha creates 8 routines in  0.257 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
-INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./HelAmps_heft.h
+INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/./Parameters_heft.cc
 INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/SubProcesses/P1_gg_bbx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
+Hunk #2 succeeded at 227 (offset 13 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m2.690s
-user	0m1.914s
-sys	0m0.257s
-Code generation completed in 3 seconds
+real	0m2.177s
+user	0m1.895s
+sys	0m0.262s
+Code generation completed in 2 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -228,7 +222,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -236,9 +230,9 @@ Code generation completed in 3 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -258,7 +252,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -266,9 +260,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_heft_gg_bb/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card.dat
index c9600c4da0..8e66ac1662 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card_default.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card_default.dat
index c9600c4da0..8e66ac1662 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat
index 92abae0d44..2e02c5ea0e 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat
index d1ec266fca..8af20dc4e4 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card.dat
@@ -170,6 +170,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat
index 5a50dbf7ed..cfef927481 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/heft_gg_bb.mad/Cards/run_card_default.dat
@@ -170,6 +170,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/heft_gg_bb.mad/MGMEVersion.txt b/epochX/cudacpp/heft_gg_bb.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings.f b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings1.f
index f0848107b5..dd11e8418e 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings1.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_74 = -((MDL_COMPLEXI*MDL_YB)/MDL_SQRT__2)
       END
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings2.f
index e2f1fb6eab..30f3a04e3b 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_13(VECID) = -(MDL_COMPLEXI*MDL_GH)
       END
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings3.f
index bcfea764e7..e033f0a91c 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makefile b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/printout.f b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/dsample.f b/epochX/cudacpp/heft_gg_bb.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/dsample.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/eepdf.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/genps.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/genps.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/run.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/run.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/run_card.inc b/epochX/cudacpp/heft_gg_bb.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/run_card.inc
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/heft_gg_bb.mad/Source/setrun.f b/epochX/cudacpp/heft_gg_bb.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/Source/setrun.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
index fb54fdea37..1291d5924f 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h
index fcd8cce98a..bb2909d380 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f
index 35195c387d..bdd8998a01 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f
index 6a68e69402..a7b5c93585 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -704,3 +706,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/driver.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/driver.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f
index 9995f8b768..4aa29f8152 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1,-1,-1, 1/
       DATA (NHEL(I,   2),I=1,4) /-1,-1,-1,-1/
       DATA (NHEL(I,   3),I=1,4) /-1,-1, 1, 1/
@@ -143,8 +140,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -153,11 +149,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=3
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=3
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -167,12 +163,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -184,7 +179,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -213,35 +209,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -313,7 +307,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/proc_characteristics
index a607b77022..be5693ddfc 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/refine.sh b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/check_param_card.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/cluster.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/combine_runs.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/file_writers.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/histograms.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/misc.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/shower_card.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/systematics.py b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/heft_gg_bb.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
index 210b67369c..aabed758ce 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/HelAmps_heft.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.cc b/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.cc
index 3995b4403a..5220c27304 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.cc
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.h b/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.h
index 1b182526a5..527073fd7e 100644
--- a/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.h
+++ b/epochX/cudacpp/heft_gg_bb.mad/src/Parameters_heft.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
index d4c3ab32de..344efcb222 100644
--- a/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
+++ b/epochX/cudacpp/heft_gg_bb.sa/CODEGEN_cudacpp_heft_gg_bb_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,23 +45,23 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 set auto_convert_model T
 save options auto_convert_model
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model heft
+INFO: load particles 
+INFO: load vertices 
+[1;34mWARNING: coupling GC_13=-(complex(0,1)*GH) has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
+[1;34mWARNING: coupling GC_16=(complex(0,1)*Gphi)/8. has direct dependence in aS but has QCD order set to 0. Automatic computation of scale uncertainty can be wrong for such model. [0m
+[1;32mDEBUG: model prefixing  takes 0.006150245666503906 [0m
 INFO: Restrict model heft with file models/heft/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: s u w+ at order: QED=1 [0m
@@ -131,13 +131,12 @@ INFO: Process has 4 diagrams
 Total: 1 processes with 4 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_heft_gg_bb
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > b b~ HIG<=1 HIW<=1 @1 
 INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1 
@@ -146,34 +145,34 @@ INFO: Processing color information for process: g g > b b~ HIG<=1 HIW<=1 @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/SubProcesses/P1_Sigma_heft_gg_bbx/. 
 Generated helas calls for 1 subprocesses (4 diagrams) in 0.008 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVS3 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFS2 routines[0m
-ALOHA: aloha creates 4 routines in  0.266 s
+ALOHA: aloha creates 4 routines in  0.265 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVS3
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFS2
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
-INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./HelAmps_heft.h
+INFO: Created file HelAmps_heft.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/./Parameters_heft.cc
 INFO: Created files Parameters_heft.h and Parameters_heft.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_heft_gg_bb/src/. 
 quit
 
-real	0m0.741s
-user	0m0.588s
-sys	0m0.060s
-Code generation completed in 1 seconds
+real	0m0.668s
+user	0m0.604s
+sys	0m0.052s
+Code generation completed in 0 seconds
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
index b115d02584..f536ab73a6 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/CODEGEN_mad_nobm_pp_ttW_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -61,7 +56,7 @@ set zerowidth_tchannel F
 import model sm-no_b_mass
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005822896957397461 [0m
+[1;32mDEBUG: model prefixing  takes 0.005758047103881836 [0m
 INFO: Restrict model sm-no_b_mass with file models/sm/restrict_no_b_mass.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -185,7 +180,7 @@ INFO: Process u~ d > t t~ w- added to mirror process d u~ > t t~ w-
 INFO: Process c~ s > t t~ w- added to mirror process s c~ > t t~ w- 
 INFO: Process d~ u > t t~ w+ added to mirror process u d~ > t t~ w+ 
 INFO: Process s~ c > t t~ w+ added to mirror process c s~ > t t~ w+ 
-4 processes with 8 diagrams generated in 0.108 s
+4 processes with 8 diagrams generated in 0.111 s
 Total: 4 processes with 8 diagrams
 add process p p > t t~ w j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -227,24 +222,24 @@ INFO: Process d~ g > t t~ w+ u~ added to mirror process g d~ > t t~ w+ u~
 INFO: Process d~ u > t t~ w+ g added to mirror process u d~ > t t~ w+ g 
 INFO: Process s~ g > t t~ w+ c~ added to mirror process g s~ > t t~ w+ c~ 
 INFO: Process s~ c > t t~ w+ g added to mirror process c s~ > t t~ w+ g 
-12 processes with 144 diagrams generated in 0.649 s
+12 processes with 144 diagrams generated in 0.661 s
 Total: 16 processes with 152 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_nobm_pp_ttW --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_nobm_pp_ttW 
 INFO: remove old information in CODEGEN_mad_nobm_pp_ttW 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g u > t t~ w+ d WEIGHTED<=5 @1 
 INFO: Processing color information for process: g u > t t~ w+ d @1 
@@ -276,13 +271,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ w+ d WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxwpd 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gu_ttxwpd [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -292,13 +282,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g d > t t~ w- u WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gd_ttxwmu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gd_ttxwmu [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -308,13 +293,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 2 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ w- d~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxwmdx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gux_ttxwmdx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -324,13 +304,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 3 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g d~ > t t~ w+ u~ WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group gdx_ttxwpux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gdx_ttxwpux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -340,13 +315,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 4 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwpg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_udx_ttxwpg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -356,13 +326,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 5 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- g WEIGHTED<=5 @1 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwmg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_dux_ttxwmg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 12 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12} [1;30m[model_handling.py at line 1548][0m [0m
@@ -372,13 +337,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 6 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u d~ > t t~ w+ WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group udx_ttxwp 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_udx_ttxwp [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
@@ -388,18 +348,14 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 7 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: d u~ > t t~ w- WEIGHTED<=4 
 INFO: Finding symmetric diagrams for subprocess group dux_ttxwm 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_dux_ttxwm [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 2 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 8 subprocesses (76 diagrams) in 0.203 s
-Wrote files for 212 helas calls in 0.911 s
+Generated helas calls for 8 subprocesses (76 diagrams) in 0.206 s
+Wrote files for 212 helas calls in 0.836 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
@@ -409,7 +365,7 @@ ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates FFV2 routines[0m
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
-ALOHA: aloha creates 6 routines in  0.201 s
+ALOHA: aloha creates 6 routines in  0.206 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -417,74 +373,97 @@ ALOHA: aloha creates 6 routines in  0.201 s
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
 <class 'aloha.create_aloha.AbstractRoutine'> FFV2
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h
-INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./HelAmps_sm_no_b_mass.h
+INFO: Created file HelAmps_sm_no_b_mass.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/./Parameters_sm_no_b_mass.cc
 INFO: Created files Parameters_sm_no_b_mass.h and Parameters_sm_no_b_mass.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_dux_ttxwm; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_dux_ttxwm; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 273 (offset 40 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_udx_ttxwp; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 268 (offset 54 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P0_udx_ttxwp; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 273 (offset 40 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_dux_ttxwmg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 268 (offset 54 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_dux_ttxwmg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gd_ttxwmu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gd_ttxwmu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gdx_ttxwpux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gdx_ttxwpux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gu_ttxwpd; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gu_ttxwpd; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gux_ttxwmdx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_gux_ttxwmdx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_udx_ttxwpg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/SubProcesses/P1_udx_ttxwpg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 73 (offset 1 line).
-Hunk #2 succeeded at 321 (offset 88 lines).
+Hunk #1 succeeded at 72 (offset 1 line).
+Hunk #2 succeeded at 316 (offset 102 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m4.821s
-user	0m4.254s
-sys	0m0.492s
+real	0m4.712s
+user	0m4.184s
+sys	0m0.513s
 Code generation completed in 5 seconds
 ************************************************************
 *                                                          *
@@ -498,7 +477,7 @@ Code generation completed in 5 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -506,9 +485,9 @@ Code generation completed in 5 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -528,7 +507,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -536,9 +515,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_nobm_pp_ttW/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card.dat
index 943e6ff7f2..e07ce1c87a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card_default.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card_default.dat
index 943e6ff7f2..e07ce1c87a 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat
index 9dd46e75a8..49ae48919c 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card.dat
index b80a436c7c..5eca3e3f2b 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card.dat
@@ -180,6 +180,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card_default.dat b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card_default.dat
index 811e9c6ab2..3b445d02a0 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Cards/run_card_default.dat
@@ -180,6 +180,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/MGMEVersion.txt b/epochX/cudacpp/nobm_pp_ttW.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings1.f
index 04b640a980..eecf80afa7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings1.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_100 = (MDL_EE*MDL_COMPLEXI*MDL_CONJG__CKM1X1)/(MDL_SW
      $ *MDL_SQRT__2)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings2.f
index be9e9f5a39..30f3a04e3b 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/couplings2.f
@@ -2,18 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
       END
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makefile b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/printout.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/dsample.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/dsample.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/eepdf.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/genps.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/genps.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/run.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/run.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/run_card.inc b/epochX/cudacpp/nobm_pp_ttW.mad/Source/run_card.inc
index 22d8b7aaa9..2588190439 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/run_card.inc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/Source/setrun.f b/epochX/cudacpp/nobm_pp_ttW.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/Source/setrun.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
index e38b43e7f3..14e10e47fc 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h
index 79e7b1b6fc..20e49fe614 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f
index 33b5b1c440..5891689dfa 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f
index a13eea8546..270de18021 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -223,7 +225,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -288,7 +290,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -327,9 +329,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -345,6 +348,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -502,11 +507,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -606,9 +606,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -753,3 +755,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/driver.f
index 57ea01dbd8..531dfa0771 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f
index a5685fa4f7..d62b9b396d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_dux_ttxwm/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) / 1,-1,-1, 1, 1/
       DATA (NHEL(I,   2),I=1,5) / 1,-1,-1, 1, 0/
       DATA (NHEL(I,   3),I=1,5) / 1,-1,-1, 1,-1/
@@ -176,8 +173,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -186,11 +182,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -200,16 +196,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -224,7 +220,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -253,35 +250,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -294,7 +289,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -359,7 +355,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
index 0541cd1591..2dd6b7e9c8 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h
index 3296b2d643..341673e627 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f
index 743426de24..4c2829fa46 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f
index 86d5f78001..83f18c6ff4 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -223,7 +225,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -288,7 +290,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -327,9 +329,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -345,6 +348,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -502,11 +507,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -606,9 +606,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -753,3 +755,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/driver.f
index 57ea01dbd8..531dfa0771 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f
index d3f67221a5..cea81c4d0d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P0_udx_ttxwp/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=48)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) / 1,-1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) / 1,-1,-1, 1, 0/
       DATA (NHEL(I,   3),I=1,5) / 1,-1,-1, 1, 1/
@@ -176,8 +173,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -186,11 +182,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -200,16 +196,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -224,7 +220,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -253,35 +250,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -294,7 +289,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -359,7 +355,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
index fc9b6764e6..2a7368ed83 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h
index 073939ee63..bfd92ecff3 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f
index 5826d24723..396df04429 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f
index 7902f12b15..bb5e721ed6 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -223,7 +225,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -288,7 +290,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -327,9 +329,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -345,6 +348,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -502,11 +507,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -606,9 +606,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -801,3 +803,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f
index 15384201fd..122483e42d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_dux_ttxwmg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1, 1,-1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1, 1, 1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 0,-1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
index 639e6e3b52..fbcc691644 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h
index 63b5815ea8..6d0f6f6b2e 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f
index c3d0230022..5ffdcf9830 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f
index eea411d970..30a102ffc6 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -222,7 +224,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -287,7 +289,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -326,9 +328,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -344,6 +347,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -499,11 +504,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -603,9 +603,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -798,3 +800,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f
index 3884a841e5..b9fc08afab 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gd_ttxwmu/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1, 1,-1, 1, 1,-1/
       DATA (NHEL(I,   2),I=1,6) /-1, 1,-1, 1, 1, 1/
       DATA (NHEL(I,   3),I=1,6) /-1, 1,-1, 1, 0,-1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
index 7722a4930f..37d6632f2d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h
index c9ef36c99e..7ecf7fb4f2 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f
index 6c6b6e46be..2fb389e215 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f
index 4aaf959523..89f373b168 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -222,7 +224,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -287,7 +289,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -326,9 +328,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -344,6 +347,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -499,11 +504,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -603,9 +603,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -798,3 +800,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f
index 9e94386d22..57c105fffc 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gdx_ttxwpux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 0, 1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
index 9740f08230..f0626d1178 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h
index 925ea70c59..38a61439fa 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f
index 10677eec81..529ea9ac57 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f
index 635d50d915..c05076fb2e 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -222,7 +224,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -287,7 +289,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -326,9 +328,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -344,6 +347,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -499,11 +504,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -603,9 +603,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -798,3 +800,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f
index f58f1a225f..f61d71c2bc 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gu_ttxwpd/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1, 1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) /-1, 1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) /-1, 1,-1, 1, 0,-1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
index 4790c0ae01..71184b0eda 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h
index 18d9b3a4bc..17ec79ccb1 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f
index f4bc3efd94..d0608f110f 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f
index 0679fc876b..34376363c9 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -222,7 +224,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -287,7 +289,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -326,9 +328,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -344,6 +347,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -499,11 +504,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -603,9 +603,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -798,3 +800,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f
index 6ce9435143..dfac31811b 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_gux_ttxwmdx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1, 1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1, 1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 0, 1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
index 03315c9b9c..c2a3e42ac6 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h
index f87264b065..ea33cce2b1 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f
index 018e3a69ea..3ab40bf574 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1135,11 +1135,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1149,32 +1150,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f
index 05cce0a465..f465e93055 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -102,6 +102,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -223,7 +225,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -288,7 +290,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -327,9 +329,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -345,6 +348,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -502,11 +507,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -606,9 +606,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -801,3 +803,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/driver.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/driver.f
index 9ff47ec544..d8518f17f7 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/driver.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f
index f441ba2a6f..3bcf9d58c1 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/P1_udx_ttxwpg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -24,6 +24,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=96)
       INTEGER    NGRAPHS
@@ -47,8 +49,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -57,26 +59,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -87,7 +86,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -97,26 +95,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 0,-1/
@@ -224,8 +221,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -234,11 +230,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -248,16 +244,16 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
-              ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C           handling only one beam polarization here. Second beam can
+C            be handle via the pdf.
             IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2))))
      $        THEN
               T=T*ABS(POL(2))
@@ -272,7 +268,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -301,35 +298,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -342,7 +337,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
 
-          ! handling only one beam polarization here. Second beam can be handle via the pdf.
+C       handling only one beam polarization here. Second beam can be
+C        handle via the pdf.
         IF(POL(2).NE.1D0.AND.NHEL(2,I).EQ.INT(SIGN(1D0,POL(2)))) THEN
           T=T*ABS(POL(2))
         ELSE IF(POL(2).NE.1D0)THEN
@@ -407,7 +403,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/proc_characteristics
index a5b0c16e13..ee4c9d4276 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = False 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/refine.sh b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/check_param_card.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/cluster.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/combine_runs.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/file_writers.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/histograms.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/misc.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/shower_card.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/systematics.py b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
index 3b95c26570..237f2d4e6c 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/HelAmps_sm_no_b_mass.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.cc b/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.cc
index 267caa6657..f105600307 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.cc
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.h b/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.h
index 589c446752..2232b653c6 100644
--- a/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.h
+++ b/epochX/cudacpp/nobm_pp_ttW.mad/src/Parameters_sm_no_b_mass.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
index e55e261367..133a8566c1 100644
--- a/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/CODEGEN_mad_pp_tt012j_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -61,7 +56,7 @@ set zerowidth_tchannel F
 define j = p
 INFO: load particles 
 INFO: load vertices 
-[1;32mDEBUG: model prefixing  takes 0.005425453186035156 [0m
+[1;32mDEBUG: model prefixing  takes 0.005753755569458008 [0m
 INFO: Restrict model sm with file models/sm/restrict_default.dat . 
 [1;32mDEBUG: Simplifying conditional expressions [0m
 [1;32mDEBUG: remove interactions: u s w+ at order: QED=1 [0m
@@ -172,7 +167,7 @@ INFO: Process u~ u > t t~ added to mirror process u u~ > t t~
 INFO: Process c~ c > t t~ added to mirror process c c~ > t t~ 
 INFO: Process d~ d > t t~ added to mirror process d d~ > t t~ 
 INFO: Process s~ s > t t~ added to mirror process s s~ > t t~ 
-5 processes with 7 diagrams generated in 0.029 s
+5 processes with 7 diagrams generated in 0.030 s
 Total: 5 processes with 7 diagrams
 add process p p > t t~ j @1
 INFO: Checking for minimal orders which gives processes. 
@@ -212,7 +207,7 @@ INFO: Process d~ g > t t~ d~ added to mirror process g d~ > t t~ d~
 INFO: Process d~ d > t t~ g added to mirror process d d~ > t t~ g 
 INFO: Process s~ g > t t~ s~ added to mirror process g s~ > t t~ s~ 
 INFO: Process s~ s > t t~ g added to mirror process s s~ > t t~ g 
-13 processes with 76 diagrams generated in 0.139 s
+13 processes with 76 diagrams generated in 0.141 s
 Total: 18 processes with 83 diagrams
 add process p p > t t~ j j @2
 INFO: Checking for minimal orders which gives processes. 
@@ -378,24 +373,24 @@ INFO: Process s~ u~ > t t~ u~ s~ added to mirror process u~ s~ > t t~ u~ s~
 INFO: Process s~ c~ > t t~ c~ s~ added to mirror process c~ s~ > t t~ c~ s~ 
 INFO: Process s~ d~ > t t~ d~ s~ added to mirror process d~ s~ > t t~ d~ s~ 
 INFO: Crossed process found for s~ s~ > t t~ s~ s~, reuse diagrams. 
-65 processes with 1119 diagrams generated in 1.859 s
+65 processes with 1119 diagrams generated in 1.869 s
 Total: 83 processes with 1202 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_pp_tt012j --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_pp_tt012j 
 INFO: remove old information in CODEGEN_mad_pp_tt012j 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Processing color information for process: g g > t t~ g g @2 
@@ -504,13 +499,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 0, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 0, 46, 47, 48, 49, 50, 51, 52, 53, 54, 0, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 0, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 0, 88, 89, 90, 91, 92, 93, 0, 94, 95, 96, 97, 98, 99, 0, 100, 101, 102, 103, 104, 105, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 105 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6, 6: 7, 7: 8, 8: 9, 9: 10, 10: 11, 11: 12, 12: 13, 13: 14, 14: 15, 15: 16, 16: 17, 17: 18, 18: 19, 19: 20, 20: 21, 21: 22, 22: 23, 23: 24, 24: 25, 25: 26, 26: 27, 27: 28, 28: 29, 29: 30, 30: 31, 31: 33, 32: 34, 33: 35, 34: 36, 35: 37, 36: 38, 37: 39, 38: 40, 39: 41, 40: 42, 41: 43, 42: 44, 43: 45, 44: 46, 45: 47, 46: 49, 47: 50, 48: 51, 49: 52, 50: 53, 51: 54, 52: 55, 53: 56, 54: 57, 55: 59, 56: 60, 57: 61, 58: 62, 59: 63, 60: 64, 61: 65, 62: 66, 63: 67, 64: 68, 65: 69, 66: 70, 67: 71, 68: 72, 69: 73, 70: 75, 71: 76, 72: 77, 73: 78, 74: 79, 75: 80, 76: 81, 77: 82, 78: 83, 79: 84, 80: 85, 81: 86, 82: 87, 83: 88, 84: 89, 85: 90, 86: 91, 87: 92, 88: 94, 89: 95, 90: 96, 91: 97, 92: 98, 93: 99, 94: 101, 95: 102, 96: 103, 97: 104, 98: 105, 99: 106, 100: 108, 101: 109, 102: 110, 103: 111, 104: 112, 105: 113} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5, 7: 6, 8: 7, 9: 8, 10: 9, 11: 10, 12: 11, 13: 12, 14: 13, 15: 14, 16: 15, 17: 16, 18: 17, 19: 18, 20: 19, 21: 20, 22: 21, 23: 22, 24: 23, 25: 24, 26: 25, 27: 26, 28: 27, 29: 28, 30: 29, 31: 30, 33: 31, 34: 32, 35: 33, 36: 34, 37: 35, 38: 36, 39: 37, 40: 38, 41: 39, 42: 40, 43: 41, 44: 42, 45: 43, 46: 44, 47: 45, 49: 46, 50: 47, 51: 48, 52: 49, 53: 50, 54: 51, 55: 52, 56: 53, 57: 54, 59: 55, 60: 56, 61: 57, 62: 58, 63: 59, 64: 60, 65: 61, 66: 62, 67: 63, 68: 64, 69: 65, 70: 66, 71: 67, 72: 68, 73: 69, 75: 70, 76: 71, 77: 72, 78: 73, 79: 74, 80: 75, 81: 76, 82: 77, 83: 78, 84: 79, 85: 80, 86: 81, 87: 82, 88: 83, 89: 84, 90: 85, 91: 86, 92: 87, 94: 88, 95: 89, 96: 90, 97: 91, 98: 92, 99: 93, 101: 94, 102: 95, 103: 96, 104: 97, 105: 98, 106: 99, 108: 100, 109: 101, 110: 102, 111: 103, 112: 104, 113: 105} [1;30m[model_handling.py at line 1548][0m [0m
@@ -520,13 +510,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 1 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxuux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
@@ -536,13 +521,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 2 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ g u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxgu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
@@ -552,13 +532,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 3 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ g u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxgux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
@@ -568,13 +543,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 0, 34, 35] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 4 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g g WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxgg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 35 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 35, 35: 36} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 35: 34, 36: 35} [1;30m[model_handling.py at line 1548][0m [0m
@@ -584,13 +554,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 5 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 15 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15} [1;30m[model_handling.py at line 1548][0m [0m
@@ -600,13 +565,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 6 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u > t t~ u u WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uu_ttxuu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
@@ -616,13 +576,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 7 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ u u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxuux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
@@ -632,13 +587,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 8 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u~ u~ > t t~ u~ u~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxux_ttxuxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 14 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14} [1;30m[model_handling.py at line 1548][0m [0m
@@ -648,13 +598,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 9 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u c > t t~ u c WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uc_ttxuc 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
@@ -664,13 +609,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 10 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ c c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxccx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
@@ -680,13 +620,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 11 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u c~ > t t~ u c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group ucx_ttxucx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
@@ -696,13 +631,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 12 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u~ c~ > t t~ u~ c~ WEIGHTED<=4 @2 
 INFO: Finding symmetric diagrams for subprocess group uxcx_ttxuxcx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 7 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7} [1;30m[model_handling.py at line 1548][0m [0m
@@ -712,13 +642,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 13 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u > t t~ u WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gu_ttxu 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
@@ -728,13 +653,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 14 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g u~ > t t~ u~ WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group gux_ttxux 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
@@ -744,13 +664,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 15 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ g WEIGHTED<=3 @1 
 INFO: Finding symmetric diagrams for subprocess group uux_ttxg 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5} [1;30m[model_handling.py at line 1548][0m [0m
@@ -760,13 +675,8 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 16 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
@@ -776,32 +686,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 17 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: u u~ > t t~ WEIGHTED<=2 
 INFO: Finding symmetric diagrams for subprocess group uux_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 1 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 18 subprocesses (372 diagrams) in 1.318 s
-Wrote files for 810 helas calls in 3.004 s
+Wrote files for 810 helas calls in 2.818 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 5 routines in  0.342 s
+ALOHA: aloha creates 5 routines in  0.346 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV3 routines[0m
 ALOHA: aloha creates VVVV4 routines[0m
-ALOHA: aloha creates 10 routines in  0.322 s
+ALOHA: aloha creates 10 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -814,121 +720,175 @@ ALOHA: aloha creates 10 routines in  0.322 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV3
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV4
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
-INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./HelAmps_sm.h
+INFO: Created file HelAmps_sm.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/./Parameters_sm.cc
 INFO: Created files Parameters_sm.h and Parameters_sm.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #2 succeeded at 227 (offset 13 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P0_uux_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 236 (offset 3 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 230 (offset 16 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gg_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 249 (offset 16 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #2 succeeded at 243 (offset 29 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gu_ttxu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 252 (offset 19 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 246 (offset 32 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_gux_ttxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 252 (offset 19 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 246 (offset 32 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P1_uux_ttxg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 252 (offset 19 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 246 (offset 32 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 281 (offset 48 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #2 succeeded at 275 (offset 61 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gg_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gu_ttxgu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_gux_ttxgux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uc_ttxuc; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 77 (offset 5 lines).
-Hunk #2 succeeded at 286 (offset 53 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 76 (offset 5 lines).
+Hunk #2 succeeded at 280 (offset 66 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_ucx_ttxucx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 83 (offset 11 lines).
-Hunk #2 succeeded at 292 (offset 59 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 82 (offset 11 lines).
+Hunk #2 succeeded at 286 (offset 72 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uu_ttxuu; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxccx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 83 (offset 11 lines).
-Hunk #2 succeeded at 292 (offset 59 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 82 (offset 11 lines).
+Hunk #2 succeeded at 286 (offset 72 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxgg; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uux_ttxuux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxcx_ttxuxcx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 77 (offset 5 lines).
-Hunk #2 succeeded at 286 (offset 53 lines).
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+Hunk #1 succeeded at 76 (offset 5 lines).
+Hunk #2 succeeded at 280 (offset 66 lines).
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/SubProcesses/P2_uxux_ttxuxux; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #1 succeeded at 75 (offset 3 lines).
-Hunk #2 succeeded at 284 (offset 51 lines).
+Hunk #1 succeeded at 74 (offset 3 lines).
+Hunk #2 succeeded at 278 (offset 64 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m12.030s
-user	0m9.841s
-sys	0m0.932s
-Code generation completed in 12 seconds
+real	0m10.632s
+user	0m9.727s
+sys	0m0.868s
+Code generation completed in 11 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -941,7 +901,7 @@ Code generation completed in 12 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -949,9 +909,9 @@ Code generation completed in 12 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -971,7 +931,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -979,9 +939,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_pp_tt012j/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/param_card.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/param_card.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/param_card_default.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/param_card_default.dat
index caf4a67ea8..bb150ef10d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat
index 2f22f719bc..1f2f1a070e 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat
index 1a36d8a6aa..5eb60f35df 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card.dat
@@ -203,6 +203,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat
index 7294e35706..38810a6b83 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/pp_tt012j.mad/Cards/run_card_default.dat
@@ -203,6 +203,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset', '--alps=0.5,1,2'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt b/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings.f b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings2.f
index e638b28035..30f3a04e3b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_10(VECID) = -G
-      GC_11(VECID) = MDL_COMPLEXI*G
-      GC_12(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings3.f
index f537dd3764..ad696f2865 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_10(VECID) = -G
       GC_11(VECID) = MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makefile b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/printout.f b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/dsample.f b/epochX/cudacpp/pp_tt012j.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/dsample.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/eepdf.inc b/epochX/cudacpp/pp_tt012j.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/genps.inc b/epochX/cudacpp/pp_tt012j.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/genps.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/run.inc b/epochX/cudacpp/pp_tt012j.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/run.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/run_card.inc b/epochX/cudacpp/pp_tt012j.mad/Source/run_card.inc
index 22d8b7aaa9..2588190439 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/run_card.inc
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/pp_tt012j.mad/Source/setrun.f b/epochX/cudacpp/pp_tt012j.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/pp_tt012j.mad/Source/setrun.f
+++ b/epochX/cudacpp/pp_tt012j.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
index 20160c38ee..e0cab101a6 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h
index 33c7f752b7..a0af4854a8 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f
index f50b3b61f3..b0fe6f24c2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f
index 39833c0293..2a748f3f03 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -704,3 +706,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f
index 528cf15af0..2dfc78bb40 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_gg_ttx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1,-1,-1, 1/
       DATA (NHEL(I,   2),I=1,4) /-1,-1,-1,-1/
       DATA (NHEL(I,   3),I=1,4) /-1,-1, 1, 1/
@@ -143,8 +140,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -153,11 +149,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -167,12 +163,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -184,7 +179,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -213,35 +209,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -313,7 +307,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
index 9ee164638a..9488909522 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h
index a9806ed2e5..b4f44d05d5 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f
index e18cee3ba8..59c357609b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f
index 66c3732b56..3896571fc6 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -759,3 +761,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f
index e25448c00d..0025b83810 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P0_uux_ttx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) / 1,-1,-1, 1/
       DATA (NHEL(I,   2),I=1,4) / 1,-1,-1,-1/
       DATA (NHEL(I,   3),I=1,4) / 1,-1, 1, 1/
@@ -146,8 +143,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -156,11 +152,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -170,12 +166,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -187,7 +182,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -216,35 +212,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -316,7 +310,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
index 6a72bd0004..c117c80635 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
index 4cf0de0da5..1336561d98 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
index e443f9d74a..18102513a2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
index c9392f3f6a..feae52d0d1 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -720,3 +722,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f
index a8c7e012bf..80c1d61cc8 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gg_ttxg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1,-1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) /-1,-1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) /-1,-1,-1,-1,-1/
@@ -159,8 +156,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -169,11 +165,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -183,12 +179,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -200,7 +195,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -229,35 +225,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -329,7 +323,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
index 0f0c8e12ab..7efa2ddf59 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
index 5200762ff8..9ee06a68ef 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
index 3a34c54b34..eb78c27eb4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
index 5f8fcf53e4..d06b3bf8c7 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -764,3 +766,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f
index 3af79030d2..3b108ad83f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gu_ttxu/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1, 1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) /-1, 1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) /-1, 1,-1,-1,-1/
@@ -162,8 +159,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -172,11 +168,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -186,12 +182,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -203,7 +198,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -232,35 +228,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -332,7 +326,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
index 8d157bfe8c..4146aaf18c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
index 99ec68e177..01422c303b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
index 995579a722..eb8a8e76e4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
index 27ef220e77..d02ea2f3b3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -764,3 +766,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f
index 1ab08162f9..1ebce3e45d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_gux_ttxux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) /-1,-1,-1, 1, 1/
       DATA (NHEL(I,   2),I=1,5) /-1,-1,-1, 1,-1/
       DATA (NHEL(I,   3),I=1,5) /-1,-1,-1,-1, 1/
@@ -162,8 +159,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -172,11 +168,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -186,12 +182,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -203,7 +198,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -232,35 +228,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -332,7 +326,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
index e5d9023f43..225c8ca5c9 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h
index 0f582e7533..f8f6c51172 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f
index 4481151b60..508328291b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f
index 6dfd0af10a..1353e82a42 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -775,3 +777,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/driver.f
index c45686a3b2..c2eadb2c31 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f
index f617e77d4c..cbe5075ab3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P1_uux_ttxg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=32)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,5) / 1,-1,-1, 1,-1/
       DATA (NHEL(I,   2),I=1,5) / 1,-1,-1, 1, 1/
       DATA (NHEL(I,   3),I=1,5) / 1,-1,-1,-1,-1/
@@ -162,8 +159,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -172,11 +168,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=4
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=4
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -186,12 +182,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -203,7 +198,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -232,35 +228,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -332,7 +326,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
index 5ab6d2dc1f..0b331fd15a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h
index aca257e0c2..b72a5017b0 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f
index 5462c5b2f7..48483f3ccd 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f
index 18dcab4951..36aca3d359 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -752,3 +754,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f
index 2af276483a..11d3797b55 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxgg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 1,-1/
@@ -191,8 +188,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -201,11 +197,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=24
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=24
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -215,12 +211,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -232,7 +227,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -261,35 +257,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -361,7 +355,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
index fc7bcb9128..4f6d700ede 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h
index 90000b218a..8ba1d7555c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f
index 19425434f5..4466e3b318 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f
index f50acf4e1e..2bb68d8cc5 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -229,7 +231,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -296,7 +298,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -335,9 +337,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -353,6 +356,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -518,11 +523,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -622,9 +622,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -785,3 +787,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f
index 14442a3c07..4a8b96c863 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gg_ttxuux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 1, 1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=12
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=12
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
index f0abafb826..fe1473ec16 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h
index 59d48b3183..50c6185fb2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f
index 86518a03ce..05928f949e 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f
index 01d90ced46..562099653e 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -796,3 +798,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f
index cd276a4930..dfc65978b0 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gu_ttxgu/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1, 1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) /-1, 1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) /-1, 1,-1, 1, 1,-1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=12
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=12
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
index 78a198f0ad..9bbe46ac79 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h
index 8b2f5cd09c..bbcbd44a5f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f
index 8a83e54567..a4d734ab51 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f
index 1772ea07ea..bdf370f218 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -232,7 +234,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -300,7 +302,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -339,9 +341,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -358,6 +361,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -529,11 +534,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -633,9 +633,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -796,3 +798,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f
index a6d3ffb1e8..f661174e8f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_gux_ttxgux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 1, 1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=12
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=12
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
index c4e155dcae..d7983b25cf 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h
index a2c6821084..19270b5496 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f
index c4823d6e82..a1ec332bbc 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1139,11 +1139,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1153,32 +1154,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f
index 43a225bd7b..f5a438d016 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -106,6 +106,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -241,7 +243,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -312,7 +314,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -351,9 +353,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -371,6 +374,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -556,11 +561,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -660,9 +660,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -823,3 +825,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f
index 8cd072202c..8e69b0f0ca 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uc_ttxuc/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -28,6 +28,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -51,8 +53,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -61,26 +63,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -91,7 +90,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -101,26 +99,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1, 1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) / 1, 1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) / 1, 1,-1, 1, 1,-1/
@@ -196,8 +193,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -206,11 +202,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -220,12 +216,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -237,7 +232,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -266,35 +262,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -366,7 +360,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
index e78aae3cdd..ec11bb2216 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h
index ff4da9ff1c..222259193b 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f
index 8cf4f9e36a..a3de0328ec 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1145,11 +1145,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1159,32 +1160,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f
index 6e6d4c2db1..c51f0fb863 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -112,6 +112,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -267,7 +269,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -344,7 +346,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -383,9 +385,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -403,6 +406,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -628,11 +633,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -732,9 +732,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -895,3 +897,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f
index 302d4ac309..d8ca6d18c2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_ucx_ttxucx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -34,6 +34,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -57,8 +59,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -67,26 +69,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -97,7 +96,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -107,26 +105,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 1, 1/
@@ -202,8 +199,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -212,11 +208,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -226,12 +222,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -243,7 +238,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -272,35 +268,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -372,7 +366,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
index 901a41c0cf..31e63dc449 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h
index 97f0e941bb..da24a1c10d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f
index 186b3a7c7d..a6a0d3cd09 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f
index 3628224ed0..7d89233913 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -807,3 +809,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f
index 52c8aadf6c..64969ed611 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uu_ttxuu/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1, 1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) / 1, 1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) / 1, 1,-1, 1, 1,-1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
index 496c0c8b1d..a4e3a7bb49 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h
index 9533fc12fa..a710a7cfff 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f
index ca6d2b083c..ff97f048f3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1145,11 +1145,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1159,32 +1160,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f
index dc917e5277..636ac38d51 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -112,6 +112,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -267,7 +269,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -344,7 +346,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -383,9 +385,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -403,6 +406,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -628,11 +633,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -732,9 +732,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -895,3 +897,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f
index b20106a6da..00b7bed9bd 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxccx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -34,6 +34,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -57,8 +59,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -67,26 +69,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -97,7 +96,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -107,26 +105,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 1, 1/
@@ -202,8 +199,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -212,11 +208,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -226,12 +222,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -243,7 +238,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -272,35 +268,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -372,7 +366,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
index 8532f570e6..6143def4bf 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h
index 2325c57c00..e23dde0cb9 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f
index ad1f4999af..c2ac14a8d0 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f
index 1e7b114e37..dacc8482d6 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -807,3 +809,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f
index 7c94ecdda2..c9dcb1350f 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxgg/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 1,-1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=12
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=12
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
index 7135109e40..7bae595aca 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h
index 9d8ce6ea23..44c39a5bc8 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f
index fd8be8f8af..0c4a9864c1 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f
index 1608ea5ad1..663dc8ed89 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -807,3 +809,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f
index 82823c1c50..2a7b58687a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uux_ttxuux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) / 1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) / 1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) / 1,-1,-1, 1, 1, 1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
index ff59b31ca9..7ea668d895 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h
index 61d801feb4..f8e2c83991 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f
index 2dabdef85f..457e737d33 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1139,11 +1139,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1153,32 +1154,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f
index 96afa77779..80d690354a 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -106,6 +106,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -241,7 +243,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -312,7 +314,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -351,9 +353,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -371,6 +374,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -556,11 +561,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -660,9 +660,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -823,3 +825,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f
index 8ba3eb160c..d270065dca 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxcx_ttxuxcx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -28,6 +28,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -51,8 +53,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -61,26 +63,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -91,7 +90,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -101,26 +99,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1, 1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1, 1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1,-1, 1/
@@ -196,8 +193,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -206,11 +202,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -220,12 +216,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -237,7 +232,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -266,35 +262,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -366,7 +360,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
index b6d60ab8be..11b75db19c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h
index b4cb750c38..8866ee1700 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f
index dc961917f2..dc4100f8ae 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1137,11 +1137,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1151,32 +1152,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f
index f1b990cb16..c97397e1e2 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -104,6 +104,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -235,7 +237,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -304,7 +306,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -343,9 +345,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -363,6 +366,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -540,11 +545,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -644,9 +644,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -807,3 +809,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/driver.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/driver.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f
index 82735f7c1e..93a9de8bf3 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/P2_uxux_ttxuxux/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -26,6 +26,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -49,8 +51,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -59,26 +61,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -89,7 +88,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -99,26 +97,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1, 1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1, 1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1,-1, 1/
@@ -194,8 +191,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -204,11 +200,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=6
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=6
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -218,12 +214,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -235,7 +230,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -264,35 +260,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -364,7 +358,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics
index 8bc9226ddb..2805dfbacf 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = False 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/refine.sh b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/pp_tt012j.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/cluster.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/combine_runs.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/file_writers.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/histograms.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/pp_tt012j.mad/bin/internal/systematics.py b/epochX/cudacpp/pp_tt012j.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/pp_tt012j.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/pp_tt012j.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
index bcf4333c78..e127a8504d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
+++ b/epochX/cudacpp/pp_tt012j.mad/src/HelAmps_sm.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc
index ebf4ab1ae2..ac2438f9e4 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc
+++ b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h
index 58ba9df50f..237c63739d 100644
--- a/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h
+++ b/epochX/cudacpp/pp_tt012j.mad/src/Parameters_sm.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
index a1ac12a7db..85e696efb7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/CODEGEN_mad_smeft_gg_tttt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,22 +45,17 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 set auto_convert_model T
 save options auto_convert_model
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t
 INFO: load particles 
 INFO: load vertices 
@@ -77,7 +72,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.14006304740905762 [0m
+[1;32mDEBUG: model prefixing  takes 0.14330029487609863 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,24 +87,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.739 s
+1 processes with 72 diagrams generated in 3.816 s
 Total: 1 processes with 72 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_smeft_gg_tttt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_smeft_gg_tttt 
 INFO: remove old information in CODEGEN_mad_smeft_gg_tttt 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
@@ -119,32 +114,28 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 0, 67, 68, 0, 69, 70] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttxttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 70 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 67: 68, 68: 69, 69: 71, 70: 72} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6, 7: 7, 8: 8, 9: 9, 10: 10, 11: 11, 12: 12, 13: 13, 14: 14, 15: 15, 16: 16, 17: 17, 18: 18, 19: 19, 20: 20, 21: 21, 22: 22, 23: 23, 24: 24, 25: 25, 26: 26, 27: 27, 28: 28, 29: 29, 30: 30, 31: 31, 32: 32, 33: 33, 34: 34, 35: 35, 36: 36, 37: 37, 38: 38, 39: 39, 40: 40, 41: 41, 42: 42, 43: 43, 44: 44, 45: 45, 46: 46, 47: 47, 48: 48, 49: 49, 50: 50, 51: 51, 52: 52, 53: 53, 54: 54, 55: 55, 56: 56, 57: 57, 58: 58, 59: 59, 60: 60, 61: 61, 62: 62, 63: 63, 64: 64, 65: 65, 66: 66, 68: 67, 69: 68, 71: 69, 72: 70} [1;30m[model_handling.py at line 1548][0m [0m
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.190 s
-Wrote files for 119 helas calls in 0.401 s
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.193 s
+Wrote files for 119 helas calls in 0.396 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.323 s
+ALOHA: aloha creates 5 routines in  0.327 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 10 routines in  0.332 s
+ALOHA: aloha creates 10 routines in  0.336 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -154,38 +145,40 @@ ALOHA: aloha creates 10 routines in  0.332 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV9
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
-INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
 INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/SubProcesses/P1_gg_ttxttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 281 (offset 48 lines).
+Hunk #2 succeeded at 275 (offset 61 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m7.325s
-user	0m6.873s
-sys	0m0.329s
+real	0m7.349s
+user	0m6.999s
+sys	0m0.311s
 Code generation completed in 7 seconds
 ************************************************************
 *                                                          *
@@ -199,7 +192,7 @@ Code generation completed in 7 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -207,9 +200,9 @@ Code generation completed in 7 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -229,7 +222,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -237,9 +230,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_smeft_gg_tttt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card.dat
index 4a29fbe719..effb6edf97 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card_default.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card_default.dat
index 4a29fbe719..effb6edf97 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat
index f365b1da04..cff7a006a2 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat
index b58554b4f2..6b82577032 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat
index 1cb94c8622..b8db871c35 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Cards/run_card_default.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/MGMEVersion.txt b/epochX/cudacpp/smeft_gg_tttt.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings2.f
index 555e337578..30f3a04e3b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings2.f
@@ -2,19 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_6(VECID) = -(MDL_COMPLEXI*G)
-      GC_7(VECID) = G
-      GC_8(VECID) = MDL_COMPLEXI*MDL_G__EXP__2
       END
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings3.f
index 0d0f298340..3a8a352951 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_6(VECID) = -(MDL_COMPLEXI*G)
       GC_7(VECID) = G
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makefile b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/printout.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/dsample.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/dsample.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/eepdf.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/genps.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/genps.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/run.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/run.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/run_card.inc b/epochX/cudacpp/smeft_gg_tttt.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/run_card.inc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/Source/setrun.f b/epochX/cudacpp/smeft_gg_tttt.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/Source/setrun.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
index bcac2c3f49..9f314a5390 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h
index 0b15804ba8..d6b7f76091 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f
index ad9980333b..f327786853 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f
index d4fae9f441..e0844348aa 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -752,3 +754,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/driver.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/driver.f
index 526cc3b0ae..f7f23196eb 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/driver.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f
index c1df290c34..631bf2245d 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=64)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,6) /-1,-1,-1, 1,-1, 1/
       DATA (NHEL(I,   2),I=1,6) /-1,-1,-1, 1,-1,-1/
       DATA (NHEL(I,   3),I=1,6) /-1,-1,-1, 1, 1, 1/
@@ -191,8 +188,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -201,11 +197,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=12
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=12
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -215,12 +211,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -232,7 +227,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -261,35 +257,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -361,7 +355,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/proc_characteristics
index 6f1aee2300..7d14ce32dc 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = False 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/refine.sh b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/check_param_card.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/cluster.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/combine_runs.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/file_writers.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/histograms.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/misc.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/shower_card.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/systematics.py b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
index b1af58c440..d6eedd0f04 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc b/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
index 8f0da6e602..c0b483292b 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h b/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
index 0c38318a9d..eb798f71f7 100644
--- a/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+++ b/epochX/cudacpp/smeft_gg_tttt.mad/src/Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
index e49d640cc9..c09b8b4601 100644
--- a/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
+++ b/epochX/cudacpp/smeft_gg_tttt.sa/CODEGEN_cudacpp_smeft_gg_tttt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,22 +45,17 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 set auto_convert_model T
 save options auto_convert_model
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt
 import model SMEFTsim_topU3l_MwScheme_UFO -massless_4t
 INFO: load particles 
 INFO: load vertices 
@@ -77,7 +72,7 @@ INFO: load vertices
 [1;32mDEBUG: MG5 converter defines FFFF26 to Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjP(-5,1)*ProjP(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjP(-5,3)*ProjP(-3,1) + Gamma(-2,-4,-3)*Gamma(-2,2,-6)*Gamma(-1,-6,-5)*Gamma(-1,4,-4)*ProjM(-5,1)*ProjM(-3,3) + Gamma(-2,-4,-3)*Gamma(-2,4,-6)*Gamma(-1,-6,-5)*Gamma(-1,2,-4)*ProjM(-5,3)*ProjM(-3,1) [0m
 [1;32mDEBUG: MG5 converter defines FFFF27 to ProjP(2,1)*ProjP(4,3) + ProjM(2,1)*ProjM(4,3) [0m
 [1;32mDEBUG: MG5 converter defines FFFF112 to ProjM(2,3)*ProjM(4,1) + ProjP(2,3)*ProjP(4,1) [0m
-[1;32mDEBUG: model prefixing  takes 0.1392049789428711 [0m
+[1;32mDEBUG: model prefixing  takes 0.14107561111450195 [0m
 INFO: Change particles name to pass to MG5 convention 
 Defined multiparticle p = g u c d s u~ c~ d~ s~
 Defined multiparticle j = g u c d s u~ c~ d~ s~
@@ -92,17 +87,16 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=4: WEIGTHED IS QCD+2*QED+99*SMHLOOP+99*NP+99*NPshifts+99*NPprop+99*NPcpv+NPcbb+NPcbB+NPcbBB+NPcbd1+NPcbd8+NPcbe+NPcbG+NPcbH+NPcbj1+NPcbj8+NPcbl+NPcbu1+NPcbu8+NPcbW+NPcdB+NPcdd1+NPcdd8+NPcdG+NPcdH+NPcdW+NPceB+NPced+NPcee+NPceH+NPceu+NPceW+NPcG+NPcGtil+NPcH+NPcHB+NPcHbox+NPcHbq+NPcHBtil+NPcHd+NPcHDD+NPcHe+NPcHG+NPcHGtil+NPcHj1+NPcHj3+NPcHl1+NPcHl3+NPcHQ1+NPcHQ3+NPcHt+NPcHtb+NPcHu+NPcHud+NPcHW+NPcHWB+NPcHWBtil+NPcHWtil+NPcjd1+NPcjd8+NPcje+NPcjj11+NPcjj18+NPcjj31+NPcjj38+NPcjQbd1+NPcjQbd8+NPcjQtu1+NPcjQtu8+NPcjtQd1+NPcjtQd8+NPcju1+NPcju8+NPcjujd1+NPcjujd11+NPcjujd8+NPcjujd81+NPcjuQb1+NPcjuQb8+NPcld+NPcle+NPclebQ+NPcledj+NPcleju1+NPcleju3+NPcleQt1+NPcleQt3+NPclj1+NPclj3+NPcll+NPcll1+NPclu+NPcQb1+NPcQb8+NPcQd1+NPcQd8+NPcQe+NPcQj11+NPcQj18+NPcQj31+NPcQj38+NPcQl1+NPcQl3+NPcQQ1+NPcQQ8+NPcQt1+NPcQt8+NPcQtjd1+NPcQtjd8+NPcQtQb1+NPcQtQb8+NPcQu1+NPcQu8+NPcQujb1+NPcQujb8+NPctB+NPctb1+NPctb8+NPctd1+NPctd8+NPcte+NPctG+NPctH+NPctj1+NPctj8+NPctl+NPctt+NPctu1+NPctu8+NPctW+NPcuB+NPcud1+NPcud8+NPcuG+NPcuH+NPcutbd1+NPcutbd8+NPcuu1+NPcuu8+NPcuW+NPcW+NPcWtil+NPQjujb8 
 INFO: Trying process: g g > t t~ t t~ WEIGHTED<=4 @1  
 INFO: Process has 72 diagrams 
-1 processes with 72 diagrams generated in 3.734 s
+1 processes with 72 diagrams generated in 3.786 s
 Total: 1 processes with 72 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ t t~ WEIGHTED<=4 @1 
 INFO: Processing color information for process: g g > t t~ t t~ @1 
@@ -111,18 +105,18 @@ INFO: Processing color information for process: g g > t t~ t t~ @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
-Generated helas calls for 1 subprocesses (72 diagrams) in 0.195 s
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/SubProcesses/P1_Sigma_SMEFTsim_topU3l_MwScheme_UFO_gg_ttxttx/. 
+Generated helas calls for 1 subprocesses (72 diagrams) in 0.191 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV5 routines[0m
 ALOHA: aloha creates FFV1 routines[0m
 ALOHA: aloha creates VVVV1 routines[0m
 ALOHA: aloha creates VVVV9 routines[0m
 ALOHA: aloha creates VVVV10 routines[0m
-ALOHA: aloha creates 5 routines in  0.335 s
+ALOHA: aloha creates 5 routines in  0.326 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> VVV5
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
@@ -132,17 +126,17 @@ ALOHA: aloha creates 5 routines in  0.335 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV9
 <class 'aloha.create_aloha.AbstractRoutine'> VVVV10
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
-INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h
+INFO: Created file HelAmps_SMEFTsim_topU3l_MwScheme_UFO.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/./Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc
 INFO: Created files Parameters_SMEFTsim_topU3l_MwScheme_UFO.h and Parameters_SMEFTsim_topU3l_MwScheme_UFO.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_smeft_gg_tttt/src/. 
 quit
 
-real	0m5.213s
-user	0m5.091s
-sys	0m0.068s
+real	0m5.220s
+user	0m5.109s
+sys	0m0.071s
 Code generation completed in 5 seconds
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
index ef041da63e..5c926cec7c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/CODEGEN_mad_susy_gg_t1t1_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -554,24 +549,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.132 s
+1 processes with 6 diagrams generated in 0.126 s
 Total: 1 processes with 6 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_t1t1 --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_t1t1 
 INFO: remove old information in CODEGEN_mad_susy_gg_t1t1 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
@@ -581,18 +576,14 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [0, 1, 2, 3, 4, 5] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_t1t1x 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 5 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 2, 2: 3, 3: 4, 4: 5, 5: 6} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {2: 1, 3: 2, 4: 3, 5: 4, 6: 5} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
-Wrote files for 16 helas calls in 0.094 s
+Wrote files for 16 helas calls in 0.083 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
@@ -608,39 +599,41 @@ ALOHA: aloha creates 6 routines in  0.185 s
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
-INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
+INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/SubProcesses/P1_gg_t1t1x; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
-Hunk #2 succeeded at 221 (offset -12 lines).
+Hunk #2 succeeded at 215 (offset 1 line).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1 done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.457s
-user	0m2.741s
-sys	0m0.294s
-Code generation completed in 4 seconds
+real	0m3.008s
+user	0m2.691s
+sys	0m0.313s
+Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
 *                      W E L C O M E to                    *
@@ -653,7 +646,7 @@ Code generation completed in 4 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -661,9 +654,9 @@ Code generation completed in 4 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -683,7 +676,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -691,9 +684,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_t1t1/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card.dat
index 16c221de5e..ffa472ed77 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card_default.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card_default.dat
index 16c221de5e..ffa472ed77 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat
index 43d8d77a42..c4c44ef282 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat
index b58554b4f2..6b82577032 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat
index 6c2c7854ca..99fa8681a6 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Cards/run_card_default.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/MGMEVersion.txt b/epochX/cudacpp/susy_gg_t1t1.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings2.f
index f9cfe802c7..30f3a04e3b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings2.f
@@ -2,23 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_6(VECID) = -G
-      GC_55(VECID) = -(MDL_COMPLEXI*G*MDL_I51X33)-MDL_COMPLEXI*G
-     $ *MDL_I52X33
-      GC_57(VECID) = -(MDL_COMPLEXI*G*MDL_I51X36)-MDL_COMPLEXI*G
-     $ *MDL_I52X36
-      GC_90(VECID) = MDL_COMPLEXI*MDL_G__EXP__2*MDL_I74X33
-     $ +MDL_COMPLEXI*MDL_G__EXP__2*MDL_I75X33
       END
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings3.f
index 4a912d71d3..72efbcef3b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_6(VECID) = -G
       GC_55(VECID) = -(MDL_COMPLEXI*G*MDL_I51X33)-MDL_COMPLEXI*G
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makefile b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/printout.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/dsample.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/dsample.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/eepdf.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/genps.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/genps.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/run.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/run.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/run_card.inc b/epochX/cudacpp/susy_gg_t1t1.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/run_card.inc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/Source/setrun.f b/epochX/cudacpp/susy_gg_t1t1.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/Source/setrun.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
index 328c2f46ce..776b1e73b7 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h
index de00fb127e..26e27d36ee 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f
index 63acfbe6f9..cfe2f0e220 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=4)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=4)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=4)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f
index a237b1ce1e..9a8c1e29c9 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -692,3 +694,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/driver.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/driver.f
index 19c169f025..3fc552a31d 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/driver.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f
index 3f3f042520..67cebdbb55 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=4)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1,-1, 0, 0/
       DATA (NHEL(I,   2),I=1,4) /-1, 1, 0, 0/
       DATA (NHEL(I,   3),I=1,4) / 1,-1, 0, 0/
@@ -131,8 +128,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -141,11 +137,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -155,12 +151,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -172,7 +167,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -201,35 +197,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -301,7 +295,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/proc_characteristics
index b76b38b49f..9c57d84817 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/refine.sh b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/check_param_card.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/cluster.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/combine_runs.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/file_writers.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/histograms.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/misc.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/shower_card.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/systematics.py b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
index dc96852e85..3c9e9c489c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/HelAmps_MSSM_SLHA2.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.cc b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.cc
index 24f387ffdf..437cc7c61d 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.cc
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h
index 699906d54c..1140000c3c 100644
--- a/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_t1t1.mad/src/Parameters_MSSM_SLHA2.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
index 693356adf2..493dd48022 100644
--- a/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
+++ b/epochX/cudacpp/susy_gg_t1t1.sa/CODEGEN_cudacpp_susy_gg_t1t1_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -554,17 +549,16 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t1 t1~ WEIGHTED<=2 @1  
 INFO: Process has 6 diagrams 
-1 processes with 6 diagrams generated in 0.123 s
+1 processes with 6 diagrams generated in 0.133 s
 Total: 1 processes with 6 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t1 t1~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t1 t1~ @1 
@@ -573,32 +567,32 @@ INFO: Processing color information for process: g g > t1 t1~ @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_t1t1x/. 
 Generated helas calls for 1 subprocesses (6 diagrams) in 0.008 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates VSS1 routines[0m
 ALOHA: aloha creates VVSS1 routines[0m
-ALOHA: aloha creates 3 routines in  0.191 s
+ALOHA: aloha creates 3 routines in  0.188 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VSS1
 <class 'aloha.create_aloha.AbstractRoutine'> VVSS1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
-INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./HelAmps_MSSM_SLHA2.h
+INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_t1t1/src/. 
 quit
 
-real	0m1.379s
-user	0m1.276s
-sys	0m0.064s
-Code generation completed in 2 seconds
+real	0m1.361s
+user	0m1.285s
+sys	0m0.066s
+Code generation completed in 1 seconds
diff --git a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
index 186a6e657d..1bc4eab35c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/CODEGEN_mad_susy_gg_tt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,15 +45,10 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
@@ -554,24 +549,24 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.119 s
+1 processes with 3 diagrams generated in 0.122 s
 Total: 1 processes with 3 diagrams
 output madevent_simd ../TMPOUT/CODEGEN_mad_susy_gg_tt --hel_recycling=False --vector_size=32
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
 [1mAddition matrix-element will be done with PLUGIN: CUDACPP_OUTPUT[0m
+[1;32mDEBUG:  opt['output_options']['vector_size'] = [0m 32 [1;30m[export_v4.py at line 4334][0m [0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m standalone_simd [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 INFO: initialize a new directory: CODEGEN_mad_susy_gg_tt 
 INFO: remove old information in CODEGEN_mad_susy_gg_tt 
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt 
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards [0m
-[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt [0m
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt 
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards [0m
+[1;34mWARNING: File exists /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses [0m
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
@@ -581,18 +576,14 @@ INFO: Creating files in directory .
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.h
 FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for ././CPPProcess.cc
 INFO: Created files CPPProcess.h and CPPProcess.cc in directory ./. 
-[1;32mDEBUG:  proc_id = [0m 1 [1;30m[export_cpp.py at line 710][0m [0m
-[1;32mDEBUG:  config_map = [0m [1, 2, 3] [1;30m[export_cpp.py at line 711][0m [0m
-[1;32mDEBUG:  subproc_number = [0m 0 [1;30m[export_cpp.py at line 712][0m [0m
-[1;32mDEBUG:  Done [1;30m[export_cpp.py at line 713][0m [0m
 INFO: Generating Feynman diagrams for Process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Finding symmetric diagrams for subprocess group gg_ttx 
-[1;32mDEBUG:  os.getcwd() = [0m /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx [1;30m[export_v4.py at line 6494][0m [0m
 [1;32mDEBUG:  len(subproc_diagrams_for_config) = [0m 3 [1;30m[model_handling.py at line 1523][0m [0m
 [1;32mDEBUG:  iconfig_to_diag = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1547][0m [0m
 [1;32mDEBUG:  diag_to_iconfig = [0m {1: 1, 2: 2, 3: 3} [1;30m[model_handling.py at line 1548][0m [0m
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
-Wrote files for 10 helas calls in 0.086 s
+Wrote files for 10 helas calls in 0.076 s
+[1;32mDEBUG:  self.vector_size = [0m 32 [1;30m[export_v4.py at line 7023][0m [0m
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
@@ -600,42 +591,45 @@ ALOHA: aloha creates 2 routines in  0.139 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 4 routines in  0.134 s
+ALOHA: aloha creates 4 routines in  0.137 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
-INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
+INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/src/. 
 The option zerowidth_tchannel is modified [True] but will not be written in the configuration files.
 If you want to make this value the default for future session, you can run 'save options --all'
-save configuration file to /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt
+save configuration file to /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt
 INFO: Use Fortran compiler gfortran 
 INFO: Use c++ compiler g++ 
 INFO: Generate jpeg diagrams 
 INFO: Generate web pages 
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
-patching file Source/genps.inc
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt; patch -p4 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.common
 patching file SubProcesses/makefile
-DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
+DEBUG: cd /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/SubProcesses/P1_gg_ttx; patch -p6 -i /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/CODEGEN/PLUGIN/CUDACPP_SA_OUTPUT/MG5aMC_patches/PROD/patch.P1
 patching file driver.f
+Hunk #1 succeeded at 76 (offset 2 lines).
+Hunk #2 succeeded at 280 (offset 8 lines).
+Hunk #3 succeeded at 489 (offset 13 lines).
 patching file matrix1.f
+Hunk #2 succeeded at 227 (offset 13 lines).
 [1;32mDEBUG:  p.returncode = [0m 0 [1;30m[output.py at line 258][0m [0m
-Output to directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done.
+Output to directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt done.
 Type "launch" to generate events from this process, or see
-/data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README
+/data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/README
 Run "open index.html" to see more information about this process.
 quit
 
-real	0m3.067s
-user	0m2.570s
-sys	0m0.289s
+real	0m2.945s
+user	0m2.632s
+sys	0m0.271s
 Code generation completed in 3 seconds
 ************************************************************
 *                                                          *
@@ -649,7 +643,7 @@ Code generation completed in 3 seconds
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -657,9 +651,9 @@ Code generation completed in 3 seconds
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
@@ -679,7 +673,7 @@ launch in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect                            *
+*         VERSION 3.6.0_lo_vect                            *
 *                                                          *
 *    The MadGraph5_aMC@NLO Development Team - Find us at   *
 *    https://server06.fynu.ucl.ac.be/projects/madgraph     *
@@ -687,9 +681,9 @@ launch in debug mode
 *               Type 'help' for in-line help.              *
 *                                                          *
 ************************************************************
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
-INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo/input/mg5_configuration.txt  
+INFO: load configuration from /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_mad_susy_gg_tt/Cards/me5_configuration.txt  
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt
index cdeedc7863..4f5079f78a 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/me5_configuration.txt
@@ -116,6 +116,7 @@
 # cluster_type = condor
 # cluster_queue = madgraph
 # cluster_size = 150 
+# cluster_walltime =  # time in minute for slurm and second for condor (not supported for other scheduller)
 
 #! Path to a node directory to avoid direct writing on the central disk
 #!  Note that condor clusters avoid direct writing by default (therefore this
@@ -234,7 +235,7 @@
 # pineappl = pineappl
 
 
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo 
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo 
 
 # MG5 MAIN DIRECTORY
-#mg5_path = /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/mg5amcnlo
+#mg5_path = /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/mg5amcnlo
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card.dat
index 16c221de5e..ffa472ed77 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card_default.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card_default.dat
index 16c221de5e..ffa472ed77 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card_default.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/param_card_default.dat
@@ -1,5 +1,5 @@
 ######################################################################
-## PARAM_CARD AUTOMATICALY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
+## PARAM_CARD AUTOMATICALLY GENERATED BY MG5 FOLLOWING UFO MODEL   ####
 ######################################################################
 ##                                                                  ##
 ##  Width set on Auto will be computed following the information    ##
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat
index 01ea0ac955..80008b5115 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/proc_card_mg5.dat
@@ -8,7 +8,7 @@
 #*                *                       *                 *
 #*                                                          *
 #*                                                          *
-#*         VERSION 3.5.3_lo_vect         2023-12-23         *
+#*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m#*                                                          *[1;0m
 [1;31m#*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m#*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/reweight_card_default.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/reweight_card_default.dat
index ace534ae02..5cc65fe095 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/reweight_card_default.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/reweight_card_default.dat
@@ -19,8 +19,16 @@
 
 change mode NLO    # Define type of Reweighting. For LO sample this command
                    # has no effect since only "LO" mode is allowed.
+
+#uncomment if you do not want to overwrite the reweight file of Sudakov in rw_me
+#change rwgt_dir /PATH_MG5_BRANCH/NAME_FOLDER
+
+#uncomment if you want to use Sudakov Reweight
+#change include_sudakov True
+
                    
 launch
+
 #  SPECIFY A PATH OR USE THE SET COMMAND LIKE THIS:
 # set sminputs 1 130 # modify 1/alpha_EW
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat
index b58554b4f2..6b82577032 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat
index 6c2c7854ca..99fa8681a6 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat
+++ b/epochX/cudacpp/susy_gg_tt.mad/Cards/run_card_default.dat
@@ -157,6 +157,7 @@
 systematics = systematics_program ! none, systematics [python], SysCalc [depreceted, C++]
 ['--mur=0.5,1,2', '--muf=0.5,1,2', '--pdf=errorset'] = systematics_arguments ! see: https://cp3.irmp.ucl.ac.be/projects/madgraph/wiki/Systematics#Systematicspythonmodule
 
+
 #***********************************************************************
 # SIMD/GPU configuration for the CUDACPP plugin
 #************************************************************************
diff --git a/epochX/cudacpp/susy_gg_tt.mad/MGMEVersion.txt b/epochX/cudacpp/susy_gg_tt.mad/MGMEVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/MGMEVersion.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/MGMEVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f b/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f
index 975725737f..47699fa614 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/DHELAS/aloha_functions.f
@@ -351,7 +351,7 @@ subroutine oxxxxx(p,fmass,nhel,nsf , fo)
             fo(6) = ip     * sqm(abs(im))
          else
 
-            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
+c            pp = min(p(0),dsqrt(p(1)**2+p(2)**2+p(3)**2))
             sf(1) = dble(1+nsf+(1-nsf)*nh)*rHalf
             sf(2) = dble(1+nsf-(1-nsf)*nh)*rHalf
             omega(1) = dsqrt(p(0)+pp)
@@ -2054,7 +2054,7 @@ subroutine CombineAmp(nb, ihels, iwfcts, W1, Wall, Amp)
       enddo
       return
       end
-      
+     
       subroutine CombineAmpS(nb, ihels, iwfcts, W1, Wall, Amp)
 
       integer nb ! size of the vectors
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings.f b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings.f
index f3b620ab58..04d6bb5333 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings.f
@@ -10,18 +10,27 @@ SUBROUTINE COUP()
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
       LOGICAL UPDATELOOP
       COMMON /TO_UPDATELOOP/UPDATELOOP
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
+
+
       INCLUDE 'coupl.inc'
       READLHA = .TRUE.
       INCLUDE 'intparam_definition.inc'
       CALL COUP1()
+      IF (UPDATELOOP) THEN
+
+        CALL COUP2()
+
+      ENDIF
+
 C     
 couplings needed to be evaluated points by points
 C     
-      CALL COUP2(1)
+      CALL COUP3(1)
 
       RETURN
       END
@@ -47,7 +56,11 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 
       INCLUDE '../maxparticles.inc'
       INCLUDE '../cuts.inc'
+
+
       INCLUDE '../vector.inc'
+
+
       INCLUDE '../run.inc'
 
       DOUBLE PRECISION ALPHAS
@@ -65,7 +78,7 @@ SUBROUTINE UPDATE_AS_PARAM(VECID)
 couplings needed to be evaluated points by points
 C     
       ALL_G(VECID) = G
-      CALL COUP2(VECID)
+      CALL COUP3(VECID)
 
       RETURN
       END
@@ -80,7 +93,9 @@ SUBROUTINE UPDATE_AS_PARAM2(MU_R2,AS2 ,VECID)
       INTEGER VECID
       INCLUDE 'model_functions.inc'
       INCLUDE 'input.inc'
+
       INCLUDE '../vector.inc'
+
       INCLUDE 'coupl.inc'
       DOUBLE PRECISION MODEL_SCALE
       COMMON /MODEL_SCALE/MODEL_SCALE
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings1.f b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings1.f
index e14f3a1770..72cfa0f6e4 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings1.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings1.f
@@ -7,11 +7,12 @@ SUBROUTINE COUP1( )
       IMPLICIT NONE
 
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       END
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings2.f b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings2.f
index c75ecfcb60..30f3a04e3b 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings2.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings2.f
@@ -2,18 +2,17 @@
 c      written by the UFO converter
 ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
 
-      SUBROUTINE COUP2( VECID)
+      SUBROUTINE COUP2( )
 
       IMPLICIT NONE
-      INTEGER VECID
+
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
-      GC_6(VECID) = -G
-      GC_51(VECID) = -(MDL_COMPLEXI*G*MDL_I51X11)
       END
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings3.f b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings3.f
index 07e51607fb..4ec0057843 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings3.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/couplings3.f
@@ -7,12 +7,13 @@ SUBROUTINE COUP3( VECID)
       IMPLICIT NONE
       INTEGER VECID
       INCLUDE 'model_functions.inc'
+      INCLUDE '../vector.inc'
+
 
       DOUBLE PRECISION PI, ZERO
       PARAMETER  (PI=3.141592653589793D0)
       PARAMETER  (ZERO=0D0)
       INCLUDE 'input.inc'
-      INCLUDE '../vector.inc'
       INCLUDE 'coupl.inc'
       GC_6(VECID) = -G
       GC_51(VECID) = -(MDL_COMPLEXI*G*MDL_I51X11)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makefile b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makefile
index 0b24445a53..5a5681d220 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makefile
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makefile
@@ -3,7 +3,7 @@
 # Makefile for model library
 #
 # ----------------------------------------------------------------------------
-
+# template models/template_files/makefile_madevent
 # Check for ../make_opts
 ifeq ($(wildcard ../make_opts), ../make_opts)
   include ../make_opts
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makeinc.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makeinc.inc
index 6e2743eac1..4a9e1b2cc5 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makeinc.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/makeinc.inc
@@ -2,4 +2,4 @@
 #      written by the UFO converter
 #############################################################################
 
-MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o
\ No newline at end of file
+MODEL = couplings.o lha_read.o printout.o rw_para.o model_functions.o couplings1.o couplings2.o couplings3.o
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/printout.f b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/printout.f
index 18b8f35b08..3e195b03a2 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/printout.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/MODEL/printout.f
@@ -1,3 +1,7 @@
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+c      written by the UFO converter
+ccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
+
 c************************************************************************
 c**                                                                    **
 c**           MadGraph/MadEvent Interface to FeynRules                 **
@@ -9,7 +13,7 @@
       subroutine printout
       implicit none
 
-      include '../vector.inc' ! defines VECSIZE_MEMMAX
+      include '../vector.inc' ! VECSIZE_MEMMAX (needed by coupl.inc)
       include 'coupl.inc' ! needs VECSIZE_MEMMAX (defined in vector.inc)
       include 'input.inc'
       
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/eepdf.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/eepdf.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdfwrap_emela.f b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdfwrap_emela.f
index bce10819d5..da1e4e2276 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdfwrap_emela.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdfwrap_emela.f
@@ -13,7 +13,7 @@ SUBROUTINE PDFWRAP
       DOUBLE PRECISION VALUE(20)
       REAL*8 ALPHASPDF
       EXTERNAL ALPHASPDF
-        ! PDFs with beamstrahlung use specific initialisation/evaluation
+C     PDFs with beamstrahlung use specific initialisation/evaluation
       LOGICAL HAS_BSTRAHL
       COMMON /TO_HAS_BS/ HAS_BSTRAHL
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdg2pdf.f b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdg2pdf.f
index 46f321e66b..2e7343a69b 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdg2pdf.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/PDF/pdg2pdf.f
@@ -108,7 +108,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         else if (abs(ipart).eq.10) then
           ipart = sign(1,ipart) * 15
         endif
-	pdg2pdf = 0d0
+        pdg2pdf = 0d0
 
         if (beamid.lt.0) then
            ih_local = ipart
@@ -122,7 +122,7 @@ double precision function pdg2pdf(ih,ipdg,beamid,x,xmu)
         endif
         do i_ee = 1, n_ee
           ee_components(i_ee) = compute_eepdf(x,omx_ee(iabs(beamid)),xmu,i_ee,ipart,ih_local)
-	enddo
+        enddo
         pdg2pdf =  ee_components(1) ! temporary to test pdf load
 c        write(*,*), x, beamid ,omx_ee(iabs(beamid)),xmu,1,ipart,ih_local,pdg2pdf
         return
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/dsample.f b/epochX/cudacpp/susy_gg_tt.mad/Source/dsample.f
index 09d482b45a..b0bc0547ad 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/dsample.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/dsample.f
@@ -204,6 +204,9 @@ subroutine sample_full(ndim,ncall,itmax,itmin,dsig,ninvar,nconfigs,VECSIZE_USED)
 
                if (VECSIZE_USED.le.1) then
                   all_fx(1) = dsig(all_p, all_wgt,0)
+                  ivec=0
+                  ilock=0
+                  iwarp=1 
                else
 c                 Here "i" is the position in the full grid of the event                  
                   do i=(iwarp-1)*WARP_SIZE+1, iwarp*warp_size
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/eepdf.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/eepdf.inc
index a0183e49ee..d50d8c62b3 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/eepdf.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/eepdf.inc
@@ -4,6 +4,9 @@
       integer n_ee
       parameter (n_ee = 4)
       ! arrays to store the components before combining them
+      ! note this common is very quickly overwritten do not use
+      ! use such common outside of auto_dsig.f
       double precision ee_components(n_ee)
       common / to_ee_components / ee_components
 
+
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/genps.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/genps.inc
index af7e0efbce..ef78e7e812 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/genps.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/genps.inc
@@ -29,9 +29,8 @@ c      parameter (max_host=99,maxplace=199,maxpoints=100,maxans=50)
 c*************************************************************************
 c	Parameters for helicity sums in matrixN.f
 c*************************************************************************
-      REAL*8 LIMHEL
-c     PARAMETER(LIMHEL=1e-8) ! ME threshold for helicity filtering (Fortran default)
-      PARAMETER(LIMHEL=0) ! ME threshold for helicity filtering (force Fortran to mimic cudacpp, see #419)
+c      REAL*8 LIMHEL
+c      PARAMETER(LIMHEL=1e-8) -> pass in the run_card.dat
       INTEGER MAXTRIES
       PARAMETER(MAXTRIES=25)
 C     To pass the helicity configuration chosen by the DiscreteSampler to 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/run.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/run.inc
index 5433a23583..81c8df6bf2 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/run.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/run.inc
@@ -106,4 +106,7 @@ c
        double precision tmin_for_channel
        integer sde_strat ! 1 means standard single diagram enhancement strategy,
 c                          2 means approximation by the denominator of the propagator
-       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
\ No newline at end of file
+       common/TO_CHANNEL_STRAT/tmin_for_channel, sde_strat
+c
+       double precision limhel
+       common/to_limhel/limhel
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/run_card.inc b/epochX/cudacpp/susy_gg_tt.mad/Source/run_card.inc
index 67af0f2051..1a1bc782bd 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/run_card.inc
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/run_card.inc
@@ -88,6 +88,8 @@
 
       DSQRT_SHAT = 0.000000000000000D+00
 
+      LIMHEL = 0.000000000000000D+00
+
       PTJ = 2.000000000000000D+01
 
       PTB = 0.000000000000000D+00
diff --git a/epochX/cudacpp/susy_gg_tt.mad/Source/setrun.f b/epochX/cudacpp/susy_gg_tt.mad/Source/setrun.f
index 9e9ef7fdbd..dc6caef748 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/Source/setrun.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/Source/setrun.f
@@ -162,9 +162,21 @@ subroutine setrun
 C       Fill common block for Les Houches init info
       do i=1,2
         if(lpp(i).eq.1.or.lpp(i).eq.2) then
-          idbmup(i)=2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=2212
+          elseif (nb_proton(i).eq.0.and.nb_neutron(i).eq.1) then
+              idbmup(i)=2112
+          else
+              idbmup(i) = 1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                               + nb_proton(i)*10000
+          endif
         elseif(lpp(i).eq.-1.or.lpp(i).eq.-2) then
-          idbmup(i)=-2212
+          if (nb_proton(i).eq.1.and.nb_neutron(i).eq.0) then
+              idbmup(i)=-2212
+          else
+              idbmup(i) = -1*(1000000000 + (nb_proton(i)+nb_neutron(i))*10
+     $                                    + nb_proton(i)*10000)
+          endif
         elseif(lpp(i).eq.3) then
           idbmup(i)=11
         elseif(lpp(i).eq.-3) then
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MGVersion.txt b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MGVersion.txt
index 9d3a5c0ba0..a806d3938c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MGVersion.txt
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/MGVersion.txt
@@ -1 +1 @@
-3.5.3_lo_vect
\ No newline at end of file
+3.6.0_lo_vect
\ No newline at end of file
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
index f2dfb25eb9..874cb91a6c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.cc
@@ -7,7 +7,7 @@
 // Further modified by: S. Hageboeck, O. Mattelaer, S. Roiser, J. Teig, A. Valassi, Z. Wettersten (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
index ed162820fd..355b4d3e77 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/CPPProcess.h
@@ -7,7 +7,7 @@
 // Further modified by: O. Mattelaer, S. Roiser, J. Teig, A. Valassi (2020-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
index ef6cba5d03..ff10374363 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig.f
@@ -376,7 +376,7 @@ SUBROUTINE DSIG_VEC(ALL_P,ALL_WGT,ALL_XBK,ALL_Q2FACT,ALL_CM_RAP
       DOUBLE PRECISION FUNCTION DSIG(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -1134,11 +1134,12 @@ END SUBROUTINE MAP_1_TO_3
 
       SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       WRITE(STREAM_ID,*) GOODHEL
       RETURN
@@ -1148,32 +1149,29 @@ SUBROUTINE WRITE_GOOD_HEL(STREAM_ID)
       SUBROUTINE READ_GOOD_HEL(STREAM_ID)
       IMPLICIT NONE
       INCLUDE 'genps.inc'
+      INCLUDE 'maxamps.inc'
       INTEGER STREAM_ID
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
       READ(STREAM_ID,*) GOODHEL
-      NTRY(1) = MAXTRIES + 1
-      NTRY(2) = MAXTRIES + 1
+      NTRY(:) = MAXTRIES + 1
       RETURN
       END
 
       SUBROUTINE INIT_GOOD_HEL()
       IMPLICIT NONE
+      INCLUDE 'maxamps.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
-      LOGICAL GOODHEL(NCOMB, 2)
-      INTEGER NTRY(2)
-      INTEGER I
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
+      INTEGER I,J
 
-      DO I=1,NCOMB
-        GOODHEL(I,1) = .FALSE.
-        GOODHEL(I,2) = .FALSE.
-      ENDDO
-      NTRY(1) = 0
-      NTRY(2) = 0
+      GOODHEL(:,:) = .FALSE.
+      NTRY(:) = 0
       END
 
       INTEGER FUNCTION GET_MAXSPROC()
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
index e00e2f9e40..51cde595e2 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/auto_dsig1.f
@@ -1,7 +1,7 @@
       DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -101,6 +101,8 @@ DOUBLE PRECISION FUNCTION DSIG1(PP,WGT,IMODE)
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
       DSIG1=0D0
 
       IF(IMODE.EQ.1)THEN
@@ -217,7 +219,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
      $  ICONF_VEC, IMIRROR_VEC, VECSIZE_USED)
 C     ****************************************************
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -281,7 +283,7 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION RHEL  ! random number
       INTEGER CHANNEL
 C     
-C     STUFF FOR DRESSED EE COLLISIONS --even if not supported for now--
+C     STUFF FOR DRESSED EE COLLISIONS
 C     
       INCLUDE '../../Source/PDF/eepdf.inc'
       DOUBLE PRECISION EE_COMP_PROD
@@ -320,9 +322,10 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       DOUBLE PRECISION ALL_RWGT(VECSIZE_MEMMAX)
 
 C     Common blocks
-      CHARACTER*7         PDLABEL,EPA_LABEL
-      INTEGER       LHAID
-      COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL
+      INCLUDE '../../Source/PDF/pdf.inc'
+C     CHARACTER*7         PDLABEL,EPA_LABEL
+C     INTEGER       LHAID
+C     COMMON/TO_PDF/LHAID,PDLABEL,EPA_LABEL     
 
 C     
 C     local
@@ -338,6 +341,8 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
 C     ----------
 C     BEGIN CODE
 C     ----------
+      SELECTED_HEL(:) = 0
+      SELECTED_COL(:) = 0
 
       IF(IMODE.EQ.1)THEN
         NFACT = DSIG1(ALL_PP(0,1,1), ALL_WGT(1), IMODE)
@@ -485,11 +490,6 @@ DOUBLE PRECISION FUNCTION DSIG1_VEC(ALL_PP, ALL_XBK, ALL_Q2FACT,
       ENDDO
 
       END
-C     
-C     Functionality to handling grid
-C     
-
-
 
 
 
@@ -589,9 +589,11 @@ SUBROUTINE SMATRIX1_MULTI(P_MULTI, HEL_RAND, COL_RAND, CHANNELS,
 C         ! This is a workaround for
 C          https://github.com/oliviermattelaer/mg5amc_test/issues/22
 C          (see PR #486)
-          IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is not called at all)
-            CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the initialization' within SMATRIX1
-          ENDIF
+C         IF( FBRIDGE_MODE .EQ. 1 ) THEN  ! (CppOnly=1 : SMATRIX1 is
+C          not called at all)
+C         CALL RESET_CUMULATIVE_VARIABLE()  ! mimic 'avoid bias of the'
+C         //' initialization' within SMATRIX1
+C         ENDIF
           CALL FBRIDGEGETNGOODHEL(FBRIDGE_PBRIDGE,NGOODHEL,NTOTHEL)
           IF( NTOTHEL .NE. NCOMB ) THEN
             WRITE(6,*) 'ERROR  ! Cudacpp/Fortran mismatch',
@@ -704,3 +706,6 @@ INTEGER FUNCTION GET_NHEL1(HEL, IPART)
               END
 
 
+
+
+
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
index 27a6e46742..ec5722702a 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/driver.f
@@ -43,6 +43,8 @@ Program DRIVER
       DOUBLE PRECISION CUMULATED_TIMING
       COMMON/GENERAL_STATS/CUMULATED_TIMING
 
+      logical init_mode
+      common /to_determine_zero_hel/init_mode
 c
 c     PARAM_CARD
 c
@@ -226,6 +228,12 @@ Program DRIVER
       call init_good_hel()
       call get_user_params(ncall,itmax,itmin,mincfig)
       maxcfig=mincfig
+      if (init_mode) then
+          fixed_ren_scale = .true.
+          fixed_fac_scale1 = .true.
+          fixed_fac_scale2 = .true.
+          ickkw = 0
+      endif 
       minvar(1,1) = 0              !This tells it to map things invarients
       write(*,*) 'Attempting mappinvarients',nconfigs,nexternal
       if (mincfig.lt.0)then
@@ -338,9 +346,11 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
       common /to_accuracy/accur
       integer           use_cut
       common /to_weight/use_cut
+
       logical init_mode
       common /to_determine_zero_hel/init_mode
-
+      include 'vector.inc'
+      include 'run.inc'
 
       integer        lbw(0:nexternal)  !Use of B.W.
       common /to_BW/ lbw
@@ -388,6 +398,9 @@ subroutine get_user_params(ncall,itmax,itmin,iconfig)
          isum_hel = 0
          multi_channel = .false.
          init_mode = .true.
+         fixed_ren_scale = .true.
+         fixed_fac_scale1 = .true.
+         fixed_fac_scale2 = .true.
          write(*,*) 'Determining zero helicities'
       else
          isum_hel= i
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
index 936c7c2736..cd17ce6fcf 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/matrix1.f
@@ -1,7 +1,7 @@
       SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
      $  ICOL)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
@@ -23,6 +23,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INCLUDE 'maxconfigs.inc'
       INCLUDE 'nexternal.inc'
       INCLUDE 'maxamps.inc'
+      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
+      INCLUDE 'run.inc'
       INTEGER                 NCOMB
       PARAMETER (             NCOMB=16)
       INTEGER    NGRAPHS
@@ -46,8 +48,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
 C     global (due to reading writting) 
 C     
-      LOGICAL GOODHEL(NCOMB,2)
-      INTEGER NTRY(2)
+      LOGICAL GOODHEL(NCOMB, MAXSPROC)
+      INTEGER NTRY(MAXSPROC)
       COMMON/BLOCK_GOODHEL/NTRY,GOODHEL
 
 C     
@@ -56,26 +58,23 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER CONFSUB(MAXSPROC,LMAXCONFIGS)
       INCLUDE 'config_subproc_map.inc'
       INTEGER NHEL(NEXTERNAL,NCOMB)
-      INTEGER ISHEL(2)
+      INTEGER ISHEL
       REAL*8 T,MATRIX1
       REAL*8 R,SUMHEL,TS(NCOMB)
       INTEGER I,IDEN
       INTEGER JC(NEXTERNAL),II
       REAL*8 HWGT, XTOT, XTRY, XREJ, XR, YFRAC(0:NCOMB)
-      INTEGER NGOOD(2), IGOOD(NCOMB,2)
-      INTEGER JHEL(2), J, JJ
-      INTEGER THIS_NTRY(2)
-      SAVE THIS_NTRY
+      INTEGER NGOOD
+      INTEGER J, JJ
       INTEGER NB_FAIL
       SAVE NB_FAIL
-      DATA THIS_NTRY /0,0/
       DATA NB_FAIL /0/
       DOUBLE PRECISION GET_CHANNEL_CUT
       EXTERNAL GET_CHANNEL_CUT
 C
-      INTEGER NGOODHEL(2) ! -1 if not yet retrieved and printed
+      INTEGER NGOODHEL ! -1 if not yet retrieved and printed
       SAVE NGOODHEL
-      DATA NGOODHEL/-1,-1/
+      DATA NGOODHEL/-1/
 C     
 C     This is just to temporarily store the reference grid for
 C      helicity of the DiscreteSampler so as to obtain its number of
@@ -86,7 +85,6 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     
       LOGICAL INIT_MODE
       COMMON /TO_DETERMINE_ZERO_HEL/INIT_MODE
-      INCLUDE '../../Source/vector.inc'  ! defines VECSIZE_MEMMAX
       DOUBLE PRECISION AMP2(MAXAMPS), JAMP2(0:MAXFLOW)
 
 
@@ -96,26 +94,25 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
       INTEGER IMIRROR, IPROC
       COMMON/TO_MIRROR/ IMIRROR,IPROC
 
-      DOUBLE PRECISION TMIN_FOR_CHANNEL
-      INTEGER SDE_STRAT  ! 1 means standard single diagram enhancement strategy,
+C     included vi run.inc    
+C     double precision tmin_for_channel	
+C     integer sde_strat ! 1 means standard single diagram enhancement
+C      strategy,
 C     2 means approximation by the	denominator of the propagator
-      COMMON/TO_CHANNEL_STRAT/TMIN_FOR_CHANNEL,	SDE_STRAT
+C     common/TO_CHANNEL_STRAT/tmin_for_channel,	sde_strat
 
       REAL*8 POL(2)
       COMMON/TO_POLARIZATION/ POL
 
-      DOUBLE PRECISION SMALL_WIDTH_TREATMENT
-      COMMON/NARROW_WIDTH/SMALL_WIDTH_TREATMENT
-
       INTEGER          ISUM_HEL
       LOGICAL                    MULTI_CHANNEL
       COMMON/TO_MATRIX/ISUM_HEL, MULTI_CHANNEL
       INTEGER MAPCONFIG(0:LMAXCONFIGS), ICONFIG
       COMMON/TO_MCONFIGS/MAPCONFIG, ICONFIG
       DATA XTRY, XREJ /0,0/
-      DATA NGOOD /0,0/
-      DATA ISHEL/0,0/
-      SAVE YFRAC, IGOOD, JHEL
+      DATA NGOOD /0/
+      DATA ISHEL/0/
+      SAVE YFRAC
       DATA (NHEL(I,   1),I=1,4) /-1,-1,-1, 1/
       DATA (NHEL(I,   2),I=1,4) /-1,-1,-1,-1/
       DATA (NHEL(I,   3),I=1,4) /-1,-1, 1, 1/
@@ -143,8 +140,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C     BEGIN CODE
 C     ----------
 
-      NTRY(IMIRROR)=NTRY(IMIRROR)+1
-      THIS_NTRY(IMIRROR) = THIS_NTRY(IMIRROR)+1
+      NTRY(1)=NTRY(1)+1
       DO I=1,NEXTERNAL
         JC(I) = +1
       ENDDO
@@ -153,11 +149,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
         DO I=1,NDIAGS
           AMP2(I)=0D0
         ENDDO
-        JAMP2(0)=2
-        DO I=1,INT(JAMP2(0))
-          JAMP2(I)=0D0
-        ENDDO
       ENDIF
+      JAMP2(0)=2
+      DO I=1,INT(JAMP2(0))
+        JAMP2(I)=0D0
+      ENDDO
       ANS = 0D0
       DO I=1,NCOMB
         TS(I)=0D0
@@ -167,12 +163,11 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 C      initialized.
 C     If HEL_PICKED==-1, this means that calls to other matrix<i>
 C      where in initialization mode as well for the helicity.
-      IF ((ISHEL(IMIRROR).EQ.0.AND.ISUM_HEL.EQ.0)
+      IF ((ISHEL.EQ.0.AND.ISUM_HEL.EQ.0)
      $ .OR.(DS_GET_DIM_STATUS('Helicity').EQ.0).OR.(HEL_PICKED.EQ.-1))
      $  THEN
         DO I=1,NCOMB
-          IF (GOODHEL(I,IMIRROR) .OR. NTRY(IMIRROR)
-     $     .LE.MAXTRIES.OR.(ISUM_HEL.NE.0).OR.THIS_NTRY(IMIRROR).LE.10)
+          IF (GOODHEL(I,1) .OR. NTRY(1).LE.MAXTRIES.OR.(ISUM_HEL.NE.0))
      $      THEN
             T=MATRIX1(P ,NHEL(1,I),JC(1),I,AMP2, JAMP2, IVEC)
 
@@ -184,7 +179,8 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             TS(I)=T
           ENDIF
         ENDDO
-        IF(NTRY(IMIRROR).EQ.(MAXTRIES+1)) THEN
+        IF(NTRY(1).EQ.(MAXTRIES+1).AND.DS_GET_DIM_STATUS('Helicity')
+     $   .NE.-1) THEN
           CALL RESET_CUMULATIVE_VARIABLE()  ! avoid biais of the initialization
         ENDIF
         IF (ISUM_HEL.NE.0) THEN
@@ -213,35 +209,33 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
             CALL DS_SET_GRID_MODE('Helicity','init')
           ENDIF
         ELSE
-          JHEL(IMIRROR) = 1
-          IF(NTRY(IMIRROR).LE.MAXTRIES.OR.THIS_NTRY(IMIRROR).LE.10)THEN
+          IF(NTRY(1).LE.MAXTRIES)THEN
             DO I=1,NCOMB
               IF(INIT_MODE) THEN
                 IF (DABS(TS(I)).GT.ANS*LIMHEL/NCOMB) THEN
                   PRINT *, 'Matrix Element/Good Helicity: 1 ', I,
      $              'IMIRROR', IMIRROR
                 ENDIF
-              ELSE IF (.NOT.GOODHEL(I,IMIRROR) .AND. (DABS(TS(I))
-     $         .GT.ANS*LIMHEL/NCOMB)) THEN
-                GOODHEL(I,IMIRROR)=.TRUE.
-                NGOOD(IMIRROR) = NGOOD(IMIRROR) +1
-                IGOOD(NGOOD(IMIRROR),IMIRROR) = I
-                PRINT *,'Added good helicity ',I,TS(I)*NCOMB/ANS,' in'
-     $           //' event ',NTRY(IMIRROR), 'local:',THIS_NTRY(IMIRROR)
+              ELSE IF (.NOT.GOODHEL(I,1) .AND. (DABS(TS(I)).GT.ANS
+     $         *LIMHEL/NCOMB)) THEN
+                GOODHEL(I,1)=.TRUE.
+                NGOOD = NGOOD +1
+                PRINT *,'Added good helicity ',I, 'for process 1',TS(I)
+     $           *NCOMB/ANS,' in event ',NTRY(1)
               ENDIF
             ENDDO
           ENDIF
-          IF(NTRY(IMIRROR).EQ.MAXTRIES)THEN
-            ISHEL(IMIRROR)=MIN(ISUM_HEL,NGOOD(IMIRROR))
+          IF(NTRY(1).EQ.MAXTRIES)THEN
+            ISHEL=MIN(ISUM_HEL,NGOOD)
 C           Print the number of good helicities
-            IF (NGOODHEL(IMIRROR).EQ.-1) THEN
-              NGOODHEL(IMIRROR)=0
+            IF (NGOODHEL.EQ.-1) THEN
+              NGOODHEL=0
               DO I=1,NCOMB
-                IF (GOODHEL(I,IMIRROR)) THEN
-                  NGOODHEL(IMIRROR)=NGOODHEL(IMIRROR)+1
+                IF (GOODHEL(I,1)) THEN
+                  NGOODHEL=NGOODHEL+1
                 ENDIF
               END DO
-              WRITE (6,*) 'NGOODHEL =', NGOODHEL(IMIRROR) ! no need to print imirror?
+              WRITE (6,*) 'NGOODHEL =', NGOODHEL
               WRITE (6,*) 'NCOMB =', NCOMB
             ENDIF
           ENDIF
@@ -313,7 +307,7 @@ SUBROUTINE SMATRIX1(P, RHEL, RCOL, CHANNEL, IVEC, ANS, IHEL,
 
       REAL*8 FUNCTION MATRIX1(P,NHEL,IC, IHEL,AMP2, JAMP2, IVEC)
 C     
-C     Generated by MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+C     Generated by MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 C     By the MadGraph5_aMC@NLO Development Team
 C     Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 C     
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.f
index 649e46f4e9..b8995283ed 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/cluster.f
@@ -552,6 +552,8 @@ logical function cluster(p, ivec)
       if (btest(mlevel,1))
      $   write (*,*)'New event'
 
+      iwin = 0
+      jwin = 0
       cluster=.false.
       clustered=.false.
       do i=0,3
@@ -663,7 +665,8 @@ logical function cluster(p, ivec)
 c     initialize graph storage
       igraphs(0)=0
       nleft=nexternal
-c     cluster 
+c     cluster
+      if (iwin.eq.0.or.jwin.eq.0) stop 21
       do n=1,nexternal-2
 c     combine winner
          imocl(n)=imap(iwin,2)+imap(jwin,2)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/proc_characteristics b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/proc_characteristics
index 9bd0635a36..0a47a3fabf 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/proc_characteristics
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/proc_characteristics
@@ -17,6 +17,8 @@
  splitting_types = [] 
  perturbation_order = [] 
  limitations = [] 
+ ew_sudakov = False 
  hel_recycling = False 
  single_color = True 
  nlo_mixed_expansion = True 
+ gauge = unitary 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/refine.sh b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/refine.sh
index afb9b99ad1..b46170ba23 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/refine.sh
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/refine.sh
@@ -57,7 +57,11 @@ j=%(directory)s
      for((try=1;try<=16;try+=1)); 
      do
 	 if [ "$keeplog" = true ] ; then
+         if [[ -e ../madevent ]];then
              ../madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         else
+             %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
              if [ -s $k ]
              then
@@ -67,7 +71,11 @@ j=%(directory)s
 		 sleep 5
              fi
 	 else
-	     ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         if [[ -e ../madevent ]];then
+	         ../madevent 2>&1 >> log.txt <input_sg.txt | tee -a log.txt;
+         else
+              %(Ppath)s/madevent 2>&1 >> $k <input_sg.txt | tee -a $k;
+         fi
 	     status_code=${PIPESTATUS[0]};
 	     if [ -s log.txt ]
              then
diff --git a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f
index 0d57772e28..0a0bafa7c1 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f
+++ b/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/reweight.f
@@ -640,8 +640,7 @@ logical function setclscales(p, keepq2bck, ivec)
 c      In case of weird behavior, uncomment line c111 to debug/investigate 
       goodjet(:) = .false.
 c111     set_goodjet(:) = .false.
-
-      if(ickkw.le.0.and.xqcut.le.0d0.and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
+      if(ickkw.le.0.and.(xqcut.le.0d0.or.init_mode).and.q2fact(1).gt.0.and.q2fact(2).gt.0.and.scale.gt.0) then
          if(use_syst)then
             s_scale(ivec)=scale
             n_qcd(ivec)=nqcd(iconfig)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/FO_analyse_card.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/FO_analyse_card.py
index 74a06a5e5f..f91fe703ef 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/FO_analyse_card.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/FO_analyse_card.py
@@ -73,6 +73,31 @@ def read_card(self, card_path):
                 raise FOAnalyseCardError('Unknown entry: %s = %s' % (key, value))
             self.keylist.append(key)
 
+    def write_card_from_template(self, card, default):
+
+        ff = open(card, 'w')
+        for line in open(default):
+            if line.startswith('#') or "=" not in line:
+                ff.write(line)
+                continue
+            print(line)
+            if '#' in line:
+                data, comment = line.split('#')
+            else:
+                data = line
+                comment = ''
+            print(data, comment)
+            args =  data.split('=')    
+            key = args[0].strip().lower()
+            value = self[key]
+            if comment:
+                print('NEW: %s = %s # %s' % (key.upper(), value, comment))
+                ff.write('%s = %s # %s' % (key.upper(), value, comment))
+            else:
+                print('NEW: %s = %s ' % (key.upper(), value))
+                ff.write('%s = %s ' % (key.upper(), value)) 
+
+
 
     def write_card(self, card_path):
         """write the parsed FO_analyse.dat (to be included in the Makefile) 
@@ -97,7 +122,7 @@ def write_card(self, card_path):
                     elif value == 'root':
                         to_add = 'rbook_fe8.o rbook_be8.o HwU_dummy.o'
                     elif value == 'lhe':
-                        to_add = 'analysis_lhe.o open_output_files_dummy.o write_event.o'
+                        to_add = 'analysis_lhe.o open_output_files_dummy.o'
                     else:
                         to_add = 'analysis_dummy.o dbook.o open_output_files_dummy.o HwU_dummy.o'
                         
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py
index 89ac239926..842f43e67a 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/banner.py
@@ -116,8 +116,8 @@ def __init__(self, banner_path=None):
     ############################################################################
     #  READ BANNER
     ############################################################################
-    pat_begin=re.compile('<(?P<name>\w*)>')
-    pat_end=re.compile('</(?P<name>\w*)>')
+    pat_begin=re.compile(r'<(?P<name>\w*)>')
+    pat_end=re.compile(r'</(?P<name>\w*)>')
 
     tag_to_file={'slha':'param_card.dat',
       'mgruncard':'run_card.dat',
@@ -319,7 +319,7 @@ def check_pid(self, pid2label):
     def get_lha_strategy(self):
         """get the lha_strategy: how the weight have to be handle by the shower"""
         
-        if not self["init"]:
+        if "init" not in self or not self["init"]:
             raise Exception("No init block define")
         
         data = self["init"].split('\n')[0].split()
@@ -537,7 +537,8 @@ def charge_card(self, tag):
             self.param_card = param_card_reader.ParamCard(param_card)
             return self.param_card
         elif tag == 'mgruncard':
-            self.run_card = RunCard(self[tag], unknown_warning=False)
+            with misc.TMP_variable(RunCard, 'allow_scan', True):
+                self.run_card = RunCard(self[tag], consistency=False, unknow_warning=False)
             return self.run_card
         elif tag == 'mg5proccard':
             proc_card = self[tag].split('\n')
@@ -976,6 +977,8 @@ class ConfigFile(dict):
     """ a class for storing/dealing with input file.
     """     
 
+    allow_scan = False
+
     def __init__(self, finput=None, **opt):
         """initialize a new instance. input can be an instance of MadLoopParam,
         a file, a path to a file, or simply Nothing"""                
@@ -993,6 +996,7 @@ def __init__(self, finput=None, **opt):
         # Initialize it with all the default value
         self.user_set = set()
         self.auto_set = set()
+        self.scan_set = {} #key -> type of list (int/float/bool/str/... for scan
         self.system_only = set()
         self.lower_to_case = {}
         self.list_parameter = {} #key -> type of list (int/float/bool/str/...
@@ -1109,6 +1113,8 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
         #1. check if the parameter is set to auto -> pass it to special
         if lower_name in self:
             targettype = type(dict.__getitem__(self, lower_name))
+            if lower_name in self.scan_set:
+                targettype = self.scan_set[lower_name] 
             if targettype != str and isinstance(value, str) and value.lower() == 'auto':
                 self.auto_set.add(lower_name)
                 if lower_name in self.user_set:
@@ -1118,13 +1124,26 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
                 return 
             elif lower_name in self.auto_set:
                 self.auto_set.remove(lower_name)
-            
+
+ 
+            #1. check if the parameter is set to auto -> pass it to special
+            scan_targettype = None 
+            if self.allow_scan and isinstance(value, str) and value.strip().startswith('scan'):
+                if lower_name in self.user_set:
+                    self.user_set.remove(lower_name)
+                self.scan_set[lower_name] = type(self[lower_name])
+                dict.__setitem__(self, lower_name, value)
+                #keep old value.
+                self.post_set(lower_name,value, change_userdefine, raiseerror)
+                return                
+            elif lower_name in self.scan_set:
+                scan_targettype = self.scan_set[lower_name] 
+                del self.scan_set[lower_name]
+
         # 2. Find the type of the attribute that we want
         if lower_name in self.list_parameter:
             targettype = self.list_parameter[lower_name]
             
-            
-            
             if isinstance(value, str):
                 # split for each comma/space
                 value = value.strip()
@@ -1248,8 +1267,11 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-        elif name in self:            
-            targettype = type(self[name])
+        elif name in self:  
+            if scan_targettype:
+               targettype = targettype
+            else:
+                 targettype = type(self[name])
         else:
             logger.debug('Trying to add argument %s in %s. ' % (name, self.__class__.__name__) +\
               'This argument is not defined by default. Please consider adding it.')
@@ -1262,7 +1284,7 @@ def __setitem__(self, name, value, change_userdefine=False,raiseerror=False):
             if change_userdefine:
                 self.user_set.add(lower_name)
             return self.post_set(lower_name, None, change_userdefine, raiseerror)
-    
+        
         value = self.format_variable(value, targettype, name=name)
         #check that the value is allowed:
         if lower_name in self.allowed_value and '*' not in self.allowed_value[lower_name]:
@@ -1444,7 +1466,7 @@ def format_variable(value, targettype, name="unknown"):
                     value =int(value[:-1]) * convert[value[-1]] 
                 elif '/' in value or '*' in value:               
                     try:
-                        split = re.split('(\*|/)',value)
+                        split = re.split(r'(\*|/)',value)
                         v = float(split[0])
                         for i in range((len(split)//2)):
                             if split[2*i+1] == '*':
@@ -1482,7 +1504,7 @@ def format_variable(value, targettype, name="unknown"):
                         value = float(value)
                     except ValueError:
                         try:
-                            split = re.split('(\*|/)',value)
+                            split = re.split(r'(\*|/)',value)
                             v = float(split[0])
                             for i in range((len(split)//2)):
                                 if split[2*i+1] == '*':
@@ -1491,7 +1513,10 @@ def format_variable(value, targettype, name="unknown"):
                                     v /=  float(split[2*i+2])
                         except:
                             v=0
-                            raise InvalidCmd("%s can not be mapped to a float" % value)
+                            if "scan" in value:
+                               raise InvalidCmd("%s is not supported here. Note that scan command can not be present simultaneously in  the run_card and param_card." % value) 
+                            else:
+                                raise InvalidCmd("%s can not be mapped to a float" % value)
                         finally:
                             value = v
             else:
@@ -1737,10 +1762,12 @@ def default_setup(self):
         self.add_param('splitting_types',[], typelist=str)
         self.add_param('perturbation_order', [], typelist=str)        
         self.add_param('limitations', [], typelist=str)        
+        self.add_param('ew_sudakov', False)
         self.add_param('hel_recycling', False)  
         self.add_param('single_color', True)
         self.add_param('nlo_mixed_expansion', True)    
-
+        self.add_param('gauge', 'U')
+    
     def read(self, finput):
         """Read the input file, this can be a path to a file, 
            a file object, a str with the content of the file."""
@@ -3104,7 +3131,7 @@ def write(self, output_file, template=None, python_template=False,
                 # do not write hidden parameter not hidden for this template 
                 #
                 if python_template:
-                    written = written.union(set(re.findall('\%\((\w*)\)s', open(template,'r').read(), re.M)))
+                    written = written.union(set(re.findall(r'\%\((\w*)\)s', open(template,'r').read(), re.M)))
                 to_write = to_write.union(set(self.hidden_param))
                 to_write = to_write.difference(written)
 
@@ -3157,7 +3184,6 @@ def get_value_from_include(self, path, list_of_params, output_dir):
         if path does not exists return the current value in self for all parameter"""
 
         #WARNING DOES NOT HANDLE LIST/DICT so far
-        misc.sprint(output_dir, path)
         # handle case where file is missing
         if not os.path.exists(pjoin(output_dir,path)):
             misc.sprint("include file not existing", pjoin(output_dir,path))
@@ -3259,7 +3285,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 text = open(path,'r').read()
                 #misc.sprint(text)
                 f77_type = ['real*8', 'integer', 'double precision', 'logical']
-                pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                                 % {'type':'|'.join(f77_type)}, re.I+re.M)
                 for fct in pattern.findall(text):
                     fsock = file_writers.FortranWriter(tmp,'w')
@@ -3287,6 +3313,7 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
             starttext = open(pjoin(outdir, path+'.orig')).read()
             fsock.remove_routine(starttext, to_mod[path][0])
             for text in to_mod[path][1]:
+                text = self.retro_compatible_custom_fct(text)
                 fsock.writelines(text)
             fsock.close()
             if not filecmp.cmp(pjoin(outdir, path), pjoin(outdir, path+'.tmp')):
@@ -3303,7 +3330,33 @@ def edit_dummy_fct_from_file(self, filelist, outdir):
                 files.mv(pjoin(outdir,path+'.orig'), pjoin(outdir, path))
 
 
+    @staticmethod
+    def retro_compatible_custom_fct(lines, mode=None):
 
+        f77_type = ['real*8', 'integer', 'double precision', 'logical']
+        function_pat = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+                                % {'type':'|'.join(f77_type)}, re.I+re.M)
+        include_pat = re.compile(r"\s+include\s+[\'\"]([\w\./]*)") 
+        
+        assert isinstance(lines, list)
+        sol = []
+
+        if mode is None or 'vector.inc' in mode:
+            search = True
+            for i,line in enumerate(lines[:]):
+                if search and re.search(include_pat, line):
+                    name = re.findall(include_pat, line)[0]
+                    misc.sprint('DETECTED INCLUDE', name)
+                    if 'vector.inc' in name:
+                        search = False
+                    if 'run.inc' in name:
+                        sol.append("       include 'vector.inc'")
+                        search = False
+                sol.append(line)
+                if re.search(function_pat, line):
+                    misc.sprint("DETECTED FCT")
+                    search = True
+        return sol
 
     def guess_entry_fromname(self, name, value):
         """
@@ -3346,7 +3399,7 @@ def update_typelist(value, name,  opts):
         #handle metadata
         opts = {}
         forced_opts = []
-        for key,val in re.findall("\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
+        for key,val in re.findall(r"\<(?P<key>[_\-\w]+)\=(?P<value>[^>]*)\>", str(name)):
             forced_opts.append(key)
             if val in ['True', 'False']:
                 opts[key] = eval(val)
@@ -3681,11 +3734,22 @@ def write_autodef(self, output_dir, output_file=None):
                 out = ["%s\n" %l for l in out]
                 fsock.writelines(out)
 
-    @staticmethod
-    def get_idbmup(lpp):
+    def get_idbmup(self, lpp, beam=1):
         """return the particle colliding pdg code"""
         if lpp in (1,2, -1,-2):
-            return math.copysign(2212, lpp)
+             target = 2212
+             if 'nb_proton1' in self:
+                 nbp = self['nb_proton%s' % beam]
+                 nbn = self['nb_neutron%s' % beam]
+             if nbp == 1 and nbn ==0:
+                 target = 2212
+             elif nbp==0 and nbn ==1:
+                 target = 2112
+             else:
+                 target = 1000000000
+                 target += 10 * (nbp+nbn)
+                 target += 10000 * nbp
+             return math.copysign(target, lpp)            
         elif lpp in (3,-3):
             return math.copysign(11, lpp)
         elif lpp in (4,-4):
@@ -3701,8 +3765,8 @@ def get_banner_init_information(self):
         the first line of the <init> block of the lhe file."""
         
         output = {}
-        output["idbmup1"] = self.get_idbmup(self['lpp1'])
-        output["idbmup2"] = self.get_idbmup(self['lpp2'])
+        output["idbmup1"] = self.get_idbmup(self['lpp1'], beam=1)
+        output["idbmup2"] = self.get_idbmup(self['lpp2'], beam=2)
         output["ebmup1"] = self["ebeam1"]
         output["ebmup2"] = self["ebeam2"]
         output["pdfgup1"] = 0
@@ -3959,7 +4023,8 @@ def check_validity(self, card):
             dict.__setitem__(card, 'pdlabel1', card['pdlabel'])
             dict.__setitem__(card, 'pdlabel2', card['pdlabel'])
 
-        if abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
+        if isinstance(card['lpp1'],int) and isinstance(card['lpp2'],int) and \
+            abs(card['lpp1']) == 1 == abs(card['lpp2']) and card['pdlabel1'] != card['pdlabel2']:
             raise InvalidRunCard("Assymetric beam pdf not supported for proton-proton collision") 
 
     def status(self, card):
@@ -4156,12 +4221,16 @@ def default_setup(self):
         self.add_param('frame_id', 6,  system=True)
         self.add_param("event_norm", "average", allowed=['sum','average', 'unity'],
                         include=False, sys_default='sum', hidden=True)
+        self.add_param("keep_log", "normal", include=False, hidden=True,
+                       comment="none: all log send to /dev/null.\n minimal: keep only log for survey of the last run.\n normal: keep only log for survey of all run. \n debug: keep all log (survey and refine)",
+                       allowed=['none', 'minimal', 'normal', 'debug'])
         #cut
         self.add_param("auto_ptj_mjj", True, hidden=True)
         self.add_param("bwcutoff", 15.0)
         self.add_param("cut_decays", False, cut='d')
         self.add_param('dsqrt_shat',0., cut=True)
         self.add_param("nhel", 0, include=False)
+        self.add_param("limhel", 1e-8, hidden=True, comment="threshold to determine if an helicity contributes when not MC over helicity.")
         #pt cut
         self.add_param("ptj", 20.0, cut='j')
         self.add_param("ptb", 0.0, cut='b')
@@ -4842,7 +4911,7 @@ def create_default_for_process(self, proc_characteristic, history, proc_def):
         # here pick strategy 2 if only one QCD color flow
         # and for pure multi-jet case
         jet_id = [21] + list(range(1, self['maxjetflavor']+1))
-        if proc_characteristic['single_color']:
+        if proc_characteristic['gauge'] != 'FD' and proc_characteristic['single_color']:
             self['sde_strategy'] = 2
             #for pure lepton final state go back to sde_strategy=1
             pure_lepton=True
@@ -5741,9 +5810,10 @@ def check_validity(self):
 
         # check that ebeam is bigger than the proton mass.
         for i in [1,2]:
-            if self['lpp%s' % i ] not in [1,2]:
+            # do not for proton mass if not proton PDF (or when scan initialization)
+            if self['lpp%s' % i ] not in [1,2] or isinstance(self['ebeam%i' % i], str):
                 continue
-
+ 
             if self['ebeam%i' % i] < 0.938:
                 if self['ebeam%i' %i] == 0:
                     logger.warning("At-rest proton mode set: energy beam set to 0.938 GeV")
@@ -6193,3 +6263,181 @@ def log_and_update(self, banner, card, par, v):
             xcard = banner.charge_card(card)
             xcard[par[0]].param_dict[(par[1],)].value = v
             xcard.write(os.path.join(self.me_dir, 'Cards', '%s.dat' % card))
+
+
+
+
+class RunCardIterator(object):
+    """A class keeping track of the scan: flag in the param_card and 
+       having an __iter__() function to scan over all the points of the scan.
+    """
+
+    logging = True
+    def __init__(self, input_path=None):
+        with misc.TMP_variable(RunCard, 'allow_scan', True):
+            self.run_card = RunCard(input_path, consistency=False)
+        self.run_card.allow_scan = True
+
+        self.itertag = [] #all the current value use
+        self.cross = []   # keep track of all the cross-section computed 
+        self.param_order = []
+        
+    def __iter__(self):
+        """generate the next param_card (in a abstract way) related to the scan.
+           Technically this generates only the generator."""
+        
+        if hasattr(self, 'iterator'):
+            return self.iterator
+        self.iterator = self.iterate()
+        return self.iterator
+    
+    def write(self, path):
+        self.__iter__.write(path)
+
+    def next(self, autostart=False):
+        """call the next iteration value"""
+        try:
+            iterator = self.iterator
+        except:
+            if autostart:
+                iterator = self.__iter__()
+            else:
+                raise
+        try:
+            out = next(iterator)
+        except StopIteration:
+            del self.iterator
+            raise
+        return out
+    
+    def iterate(self):
+        """create the actual generator"""
+        all_iterators = {} # dictionary of key -> block of object to scan [([param, [values]), ...]
+        pattern = re.compile(r'''scan\s*(?P<id>\d*)\s*:\s*(?P<value>[^#]*)''', re.I)
+
+        # fill all_iterators with the run_card information
+        for name in self.run_card.scan_set:
+            value = self.run_card[name]
+            try:
+               key, def_list = pattern.findall(value)[0] 
+            except Exception as error:
+               misc.sprint(error)
+               raise Exception("Fail to handle scanning tag in run_card: Please check that the syntax is valid") 
+            if key == '': 
+                key = -1 * len(all_iterators)
+            if key not in all_iterators:
+                all_iterators[key] = []
+            try:
+                all_iterators[key].append( (name, eval(def_list)))
+            except SyntaxError as error:
+                raise Exception("Fail to handle your scan definition. Please check your syntax:\n entry: %s \n Error reported: %s" %(def_list, error))
+        
+        #prepare to keep track of parameter changing for the report
+        keys = list(all_iterators.keys()) # need to fix an order for the scan
+        #store the type of parameter
+        for key in keys:
+            for param, values in all_iterators[key]:
+                self.param_order.append("run_card#%s" % (param))
+
+        # do the loop
+        lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
+            self.itertag = []
+            if self.logging:
+                logger.info("Create the next run_card in the scan definition (%s/%s) " %( i+1, total), '$MG:BOLD')
+            for i, pos in enumerate(positions):
+                key = keys[i]
+                for param, values in all_iterators[key]:
+                    # assign the value in the card.
+                    self.run_card[param] = values[pos]
+                    self.itertag.append(values[pos])
+                    if self.logging:
+                        logger.info("change parameter %s to %s", \
+                                   param, values[pos])
+            
+            
+            # retrun the current param_card up to next iteration
+            yield self.run_card
+        
+    
+    def store_entry(self, run_name, cross, error=None, run_card_path=None):
+        """store the value of the cross-section"""
+        
+        if isinstance(cross, dict):
+            info = dict(cross)
+            info.update({'bench' : self.itertag, 'run_name': run_name})
+            self.cross.append(info)
+        else:
+            if error is None:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross})
+            else:
+                self.cross.append({'bench' : self.itertag, 'run_name': run_name, 'cross(pb)':cross, 'error(pb)':error})   
+        
+
+    def write_summary(self, path, order=None, lastline=False, nbcol=20):
+        """ """
+
+        if path:
+            ff = open(path, 'w')
+            path_events = path.rsplit("/", 1)[0]
+            #identCard = open(pjoin(path.rsplit("/", 2)[0], "Cards", "ident_card.dat"))
+            #identLines = identCard.readlines()
+            #identCard.close()
+        else:
+            ff = StringIO.StringIO()        
+        if order:
+            keys = order
+        else:
+            keys = list(self.cross[0].keys())
+            if 'bench' in keys: keys.remove('bench')
+            if 'run_name' in keys: keys.remove('run_name')
+            keys.sort()
+            if 'cross(pb)' in keys:
+                keys.remove('cross(pb)')
+                keys.append('cross(pb)')
+            if 'error(pb)' in keys:
+                keys.remove('error(pb)')
+                keys.append('error(pb)')
+
+        formatting = "#%s%s%s\n" %('%%-%is ' % (nbcol-1), ('%%-%is ' % (nbcol))* len(self.param_order),
+                                             ('%%-%is ' % (nbcol))* len(keys))
+        # header
+        if not lastline:
+            ff.write(formatting % tuple(['run_name'] + self.param_order + keys))
+        formatting = "%s%s%s\n" %('%%-%is ' % (nbcol), ('%%-%ie ' % (nbcol))* len(self.param_order),
+                                             ('%%-%ie ' % (nbcol))* len(keys))
+
+        if not lastline:
+            to_print = self.cross
+        else:
+            to_print = self.cross[-1:]
+        for info in to_print:
+            name = info['run_name']
+            bench = info['bench']
+            data = []
+            for k in keys:
+                if k in info:
+                    data.append(info[k])
+                else:
+                    data.append(0.)
+            ff.write(formatting % tuple([name] + bench + data))
+            ff_single = open(pjoin(path_events, name, "params.dat"), "w")
+            for i_bench in range(0, len(bench)):
+                ff_single.write(self.param_order[i_bench] + " = " + str(bench[i_bench]) +"\n")
+            ff_single.close()
+
+        if not path:
+            return ff.getvalue()
+        
+         
+    def get_next_name(self, run_name):
+        """returns a smart name for the next run"""
+    
+        if '_' in run_name:
+            name, value = run_name.rsplit('_',1)
+            if value.isdigit():
+                return '%s_%02i' % (name, float(value)+1)
+        # no valid '_' in the name
+        return '%s_scan_02' % run_name
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/check_param_card.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/check_param_card.py
index 71089d7480..bc785b5de6 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/check_param_card.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/check_param_card.py
@@ -649,7 +649,7 @@ def write_inc_file(self, outpath, identpath, default, need_mp=False):
         #check if we need to write the value of scale for some block
         if os.path.exists(input_inc):
             text = open(input_inc).read()
-            scales = list(set(re.findall('mdl__(\w*)__scale', text, re.I)))
+            scales = list(set(re.findall(r'mdl__(\w*)__scale', text, re.I)))
         else: 
             scales = []
 
@@ -1000,10 +1000,12 @@ def iterate(self):
                 self.param_order.append("%s#%s" % (param.lhablock, '_'.join(repr(i) for i in param.lhacode)))
         # do the loop
         lengths = [list(range(len(all_iterators[key][0][1]))) for key in keys]
-        for positions in itertools.product(*lengths):
+        from functools import reduce
+        total = reduce((lambda x, y: x * y),[len(x) for x in lengths])
+        for i,positions in enumerate(itertools.product(*lengths)):
             self.itertag = []
             if self.logging:
-                logger.info("Create the next param_card in the scan definition", '$MG:BOLD')
+                logger.info("Create the next param_card in the scan definition (%s/%s)" % (i+1,total), '$MG:BOLD')
             for i, pos in enumerate(positions):
                 key = keys[i]
                 for param, values in all_iterators[key]:
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/cluster.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/cluster.py
index 9a893f630d..1ad860e04f 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/cluster.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/cluster.py
@@ -646,7 +646,10 @@ def worker(self):
                         if os.path.exists(exe) and not exe.startswith('/'):
                             exe = './' + exe
                         if isinstance(opt['stdout'],str):
-                            opt['stdout'] = open(opt['stdout'],'w')
+                            if opt['stdout'] == '/dev/null':
+                                opt['stdout'] = os.open(os.devnull, os.O_RDWR)
+                            else:    
+                                opt['stdout'] = open(opt['stdout'],'w')
                         if opt['stderr'] == None:
                             opt['stderr'] = subprocess.STDOUT
                         if arg:
@@ -671,11 +674,12 @@ def worker(self):
                         self.pids.put(pid)
                         # the function should return 0 if everything is fine
                         # the error message otherwise
-                        returncode = exe(*arg, **opt)
-                        if returncode != 0:
-                            logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
+                        try:
+                            returncode = exe(*arg, **opt)
+                        except Exception as error:
+                            #logger.warning("fct %s does not return 0. Stopping the code in a clean way. The error was:\n%s", exe, returncode)
                             self.stoprequest.set()
-                            self.remove("fct %s does not return 0:\n %s" % (exe, returncode))
+                            self.remove("fct %s does raise %s\n %s" % (exe, error))
                 except Exception as error:
                     self.fail_msg = sys.exc_info()
                     logger.warning(str(error))
@@ -700,7 +704,7 @@ def worker(self):
             
     
     def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
-               log=None, required_output=[], nb_submit=0):
+               log=None, required_output=[], nb_submit=0, python_opts={}):
         """submit a job on multicore machine"""
         
         # open threads if needed   
@@ -720,7 +724,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
             return tag
         else:
             # python function
-            self.queue.put((tag, prog, argument, {}))
+            self.queue.put((tag, prog, argument, python_opts))
             self.submitted.put(1)
             return tag            
         
@@ -908,6 +912,10 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         else:
             requirement = ''
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+            requirement+='\n MaxRuntime =  %s' % self.options['cluster_walltime'] 
+
         if cwd is None:
             cwd = os.getcwd()
         if stdout is None:
@@ -936,7 +944,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         #Submitting job(s).
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         output = output.decode(errors='ignore')
         try:
             id = pat.search(output).groups()[0]
@@ -1025,7 +1033,7 @@ def submit2(self, prog, argument=[], cwd=None, stdout=None, stderr=None,
         #Logging submit event(s).
         #1 job(s) submitted to cluster 2253622.
         output = output.decode(errors='ignore')
-        pat = re.compile("submitted to cluster (\d*)",re.MULTILINE)
+        pat = re.compile(r"submitted to cluster (\d*)",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1588,7 +1596,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
 
         output = a.communicate()[0].decode(errors='ignore')
         #Your job 874511 ("test.sh") has been submitted
-        pat = re.compile("Your job (\d*) \(",re.MULTILINE)
+        pat = re.compile(r"Your job (\d*) \(",re.MULTILINE)
         try:
             id = pat.search(output).groups()[0]
         except:
@@ -1606,7 +1614,7 @@ def control_one_job(self, id):
         if not status:
             return 'F'
         #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-        pat = re.compile("^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
+        pat = re.compile(r"^(\d+)\s+[\d\.]+\s+[\w\d\.]+\s+[\w\d\.]+\s+(\w+)\s")
         stat = ''
         for line in status.stdout.read().decode(errors='ignore').split('\n'):
             if not line:
@@ -1636,7 +1644,7 @@ def control(self, me_dir=None):
             cmd = 'qstat -s %s' % statusflag
             status = misc.Popen([cmd], shell=True, stdout=subprocess.PIPE)
             #874516 0.00000 test.sh    alwall       qw    03/04/2012 22:30:35                                    1
-            pat = re.compile("^(\d+)")
+            pat = re.compile(r"^(\d+)")
             for line in status.stdout.read().decode(errors='ignore').split('\n'):
                 line = line.strip()
                 try:
@@ -1715,6 +1723,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
             stderr = stdout
         if log is None:
             log = '/dev/null'
+
         
         command = ['sbatch', '-o', stdout,
                    '-J', me_dir, 
@@ -1726,6 +1735,12 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
                 command.insert(1, '-p')
                 command.insert(2, self.cluster_queue)
 
+        if 'cluster_walltime' in self.options and self.options['cluster_walltime']\
+              and self.options['cluster_walltime'] != 'None':
+                command.insert(1, '-t')
+                command.insert(2, self.options['cluster_walltime'])            
+            
+
 
         a = misc.Popen(command, stdout=subprocess.PIPE, 
                                       stderr=subprocess.STDOUT,
@@ -1736,7 +1751,7 @@ def submit(self, prog, argument=[], cwd=None, stdout=None, stderr=None, log=None
         id = output_arr[3].rstrip()
 
         if not id.isdigit():
-            id = re.findall('Submitted batch job ([\d\.]+)', ' '.join(output_arr))
+            id = re.findall(r'Submitted batch job ([\d\.]+)', ' '.join(output_arr))
             
             if not id or len(id)>1:
                 raise ClusterManagmentError( 'fail to submit to the cluster: \n%s' \
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/combine_runs.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/combine_runs.py
index 4de6b84ec0..b1e8c88eac 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/combine_runs.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/combine_runs.py
@@ -20,6 +20,7 @@
 from __future__ import absolute_import
 import math
 import os
+import shutil
 import re
 import logging
 from six.moves import range
@@ -117,6 +118,7 @@ def sum_multichannel(self, channel):
             
         #Now read in all of the events and write them
         #back out with the appropriate scaled weight
+        to_clean = []
         fsock = open(pjoin(channel, 'events.lhe'), 'w')
         wgt = results.axsec / results.nunwgt
         tot_nevents, nb_file = 0, 0
@@ -129,8 +131,14 @@ def sum_multichannel(self, channel):
             nw = self.copy_events(fsock, pjoin(path,'events.lhe'), wgt)
             tot_nevents += nw
             nb_file += 1
+            to_clean.append(path)
         logger.debug("Combined %s file generating %s events for %s " , nb_file, tot_nevents, channel)
-
+        for path in to_clean:
+            try:
+                shutil.rmtree(path)
+            except Exception as error:
+                pass
+            
     @staticmethod
     def get_fortran_str(nb):
         data = '%E' % nb
@@ -162,6 +170,7 @@ def copy_events(self, fsock, input, new_wgt):
             fsock.write(line)
             old_line = line
         return nb_evt
+    
     def get_channels(self, proc_path):
         """Opens file symfact.dat to determine all channels"""
         sympath = os.path.join(proc_path, 'symfact.dat')
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py
index 9bd9d9cb50..194f0cdfbd 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/common_run_interface.py
@@ -23,6 +23,7 @@
 import ast
 import logging
 import math
+import copy
 import os
 import re
 import shutil
@@ -181,6 +182,23 @@ def help_add_time_of_flight(self):
         logger.info('   threshold option allows to change the minimal value required to')
         logger.info('   a non zero value for the particle (default:1e-12s)')
 
+    def help_print_results(self):
+        logger.info("syntax: print_results [RUN_NAME] [OPTIONS]")
+        logger.info("-- print the results of the previous run on the screen")
+        logger.info("   If not RUN_NAME is provided, the information of all run")
+        logger.info("   are printed one after another.")
+        logger.info("")
+        logger.info("   supported options:")
+        logger.info("   ------------------")
+        logger.info("   --format=full|short # default is full")
+        logger.info("     full format contains banner/... ")
+        logger.info("     while short is a simple multi-column format (nice for plotting)")
+        logger.info("   --path=")
+        logger.info("     allow to write the information to a file.")
+        logger.info("   --mode=w|a  #default is w ")
+        logger.info("     when the information is printed to a file, you can choose ")
+        logger.info("     to either overwrite the file if already exists (w mode)")
+        logger.info("     to append the information at the end of the file (a mode)")
 
 
 class CheckValidForCmd(object):
@@ -727,7 +745,7 @@ def __init__(self, me_dir, options, *args, **opts):
         if not  self.proc_characteristics['ninitial']:
             # Get number of initial states
             nexternal = open(pjoin(self.me_dir,'Source','nexternal.inc')).read()
-            found = re.search("PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
+            found = re.search(r"PARAMETER\s*\(NINCOMING=(\d)\)", nexternal)
             self.ninitial = int(found.group(1))
         else:
             self.ninitial = self.proc_characteristics['ninitial']
@@ -989,7 +1007,22 @@ def do_treatcards(self, line, amcatnlo=False):
                 #raise Exception, "%s %s %s" % (sys.path, os.path.exists(pjoin(self.me_dir,'bin','internal', 'ufomodel')), os.listdir(pjoin(self.me_dir,'bin','internal', 'ufomodel')))
                 import ufomodel as ufomodel
                 zero = ufomodel.parameters.ZERO
-                if self.proc_characteristics['nlo_mixed_expansion']:
+                no_width = []
+
+                if self.proc_characteristics['ew_sudakov']:
+                    # if the sudakov approximation is used, force all particle widths to zero
+                    # unless the complex mass scheme is used
+                    if not self.proc_characteristics['complex_mass_scheme']:
+                        no_width = [p for p in ufomodel.all_particles if p.width != zero]
+                        logger.info('''Setting all particle widths to zero (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    # also, check that the model features the 'ntadpole' parameter, and set it to 1
+                    try:
+                        param_card['tadpole'].get(1).value = 1.
+                        logger.info('''Setting the value of ntadpole to 1 (needed for EW Sudakov approximation).''','$MG:BOLD')
+                    except KeyError:
+                        logger.warning('''The model has no 'ntadpole' parameter. The Sudakov approximation for EW corrections may give wrong results.''')
+
+                elif self.proc_characteristics['nlo_mixed_expansion']:
                     no_width = [p for p in ufomodel.all_particles
                             if (str(p.pdg_code) in pids or str(-p.pdg_code) in pids)
                             and p.width != zero]
@@ -1168,17 +1201,17 @@ def detect_card_type(path):
                     'Begin Minpts',
                     'gridpack',
                     'ebeam1',
-                    'block\s+mw_run',
+                    r'block\s+mw_run',
                     'BLOCK',
                     'DECAY',
                     'launch',
                     'madspin',
-                    'transfer_card\.dat',
+                    r'transfer_card\.dat',
                     'set',
                     'main:numberofevents',   # pythia8,
                     '@MG5aMC skip_analysis',              #MA5 --both--
-                    '@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
-                    '@MG5aMC\s*reconstruction_name', # MA5 hadronique
+                    r'@MG5aMC\s*inputs\s*=\s*\*\.(?:hepmc|lhe)', #MA5 --both--
+                    r'@MG5aMC\s*reconstruction_name', # MA5 hadronique
                     '@MG5aMC', # MA5 hadronique
                     'run_rivet_later', # Rivet
                     ]
@@ -1237,7 +1270,7 @@ def detect_card_type(path):
                 return 'madspin_card.dat'
             if 'decay' in text:
                 # need to check if this a line like "decay w+" or "set decay"
-                if re.search("(^|;)\s*decay", fulltext, re.M):
+                if re.search(r"(^|;)\s*decay", fulltext, re.M):
                     return 'madspin_card.dat'
                 else:
                     return 'reweight_card.dat'
@@ -2074,6 +2107,12 @@ def check_multicore(self):
             # ensure that the run_card is present
             if not hasattr(self, 'run_card'):
                 self.run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'))
+            # Below reads the run_card in the LHE file rather than the Cards/run_card
+            import madgraph.various.lhe_parser as lhe_parser
+            args_path = list(args)
+            self.check_decay_events(args_path) 
+            self.run_card = banner_mod.RunCard(lhe_parser.EventFile(args_path[0]).get_banner()['mgruncard'])
+
             
             # we want to run this in a separate shell to avoid hard f2py crash
             command =  [sys.executable]
@@ -2085,6 +2124,12 @@ def check_multicore(self):
                 command.append('--web')
             command.append('reweight')
             
+            ## TV: copy the event file as backup before starting reweighting
+            event_path = pjoin(self.me_dir, 'Events', self.run_name, 'events.lhe.gz')
+            event_path_backup = pjoin(self.me_dir, 'Events', self.run_name, 'events_orig.lhe.gz')
+            if os.path.exists(event_path) and not os.path.exists(event_path_backup):
+                shutil.copyfile(event_path, event_path_backup)
+
             #########   START SINGLE CORE MODE ############
             if self.options['nb_core']==1 or self.run_card['nevents'] < 101 or not check_multicore(self):
                 if self.run_name:
@@ -2200,7 +2245,7 @@ def check_multicore(self):
                         cross_sections[key] = value / (nb_event+1)
                 lhe.remove()
                 for key in cross_sections:
-                    if key == 'orig' or key.isdigit():
+                    if key == 'orig' or (key.isdigit() and not (key[0] == '2')):
                         continue
                     logger.info('%s : %s pb' % (key, cross_sections[key]))
                 return
@@ -2461,7 +2506,7 @@ def do_add_time_of_flight(self, line):
 
     ############################################################################ 
     def do_print_results(self, line):
-        """Not in help:Print the cross-section/ number of events for a given run"""
+        """Print the cross-section/ number of events for a given run"""
         
         args = self.split_arg(line)
         options={'path':None, 'mode':'w', 'format':'full'}
@@ -2942,10 +2987,15 @@ def do_rivet(self, line, postprocess=False):
         #2 Prepare Rivet setup environments
         rivet_path = self.options['rivet_path']
         yoda_path = self.options['yoda_path']
+        fastjet_path = subprocess.Popen([self.options['fastjet'], '--prefix'],
+                         stdout = subprocess.PIPE).stdout.read().decode(errors='ignore').strip()
+
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(rivet_path, 'bin'))
         set_env = set_env + "export PATH={0}:$PATH\n".format(pjoin(yoda_path, 'bin'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(rivet_path, 'lib'), pjoin(rivet_path, 'lib64'))
         set_env = set_env + "export LD_LIBRARY_PATH={0}:{1}:$LD_LIBRARY_PATH\n".format(pjoin(yoda_path, 'lib'), pjoin(yoda_path, 'lib64'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(self.options['hepmc_path'], 'lib'))
+        set_env = set_env + "export LD_LIBRARY_PATH={0}:$LD_LIBRARY_PATH\n".format(pjoin(fastjet_path, 'lib'))
         major, minor = sys.version_info[0:2]
         set_env = set_env + "export PYTHONPATH={0}:{1}:$PYTHONPATH\n".format(pjoin(rivet_path, 'lib', 'python%s.%s' %(major,minor), 'site-packages'),\
                                                                            pjoin(rivet_path, 'lib64', 'python%s.%s' %(major,minor), 'site-packages'))
@@ -4311,8 +4361,8 @@ def complete_compute_widths(self, text, line, begidx, endidx, formatting=True):
         else:
             completion = {}            
             completion['options'] = self.list_completion(text, 
-                            ['--path=', '--output=', '--min_br=0.\$', '--nlo',
-                             '--precision_channel=0.\$', '--body_decay='])            
+                            ['--path=', '--output=', r'--min_br=0.\$', '--nlo',
+                             r'--precision_channel=0.\$', '--body_decay='])            
         
         return self.deal_multiple_categories(completion, formatting)
         
@@ -4821,9 +4871,9 @@ class AskforEditCard(cmd.OneLinePathCompletion):
     (return False to repeat the question)
     """
 
-    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 
+    all_card_name = ['param_card', 'run_card', 'pythia_card', 'pythia8_card', 'fo_analysis_card'
                      'madweight_card', 'MadLoopParams', 'shower_card', 'rivet_card']
-    to_init_card = ['param', 'run', 'madweight', 'madloop', 
+    to_init_card = ['param', 'run', 'madweight', 'madloop', 'fo_analysis',
                     'shower', 'pythia8','delphes','madspin', 'rivet']
     special_shortcut = {}
     special_shortcut_help = {}
@@ -4853,6 +4903,7 @@ def load_default(self):
         self.has_PY8 = False
         self.has_delphes = False
         self.has_rivet = False
+        self.has_fo_card = False
         self.paths = {}
         self.update_block = []
 
@@ -5058,7 +5109,8 @@ def init_run(self, cards):
         
         
         try:
-            self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(self.paths['run'], consistency='warning')
         except IOError:
             self.run_card = {}
         try:
@@ -5248,6 +5300,15 @@ def init_delphes(self, cards):
         self.has_delphes = True
         return []
 
+    def init_fo_analysis(self, cards):
+        self.has_fo_card = False
+        if not self.get_path('FO_analyse', cards):
+            return []
+        self.has_fo_card = True
+        self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
+        self.fo_card_def = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse_default'])
+        return list(self.fo_card.string_vars)
+
 
     def set_CM_velocity(self, line):
         """compute sqrts from the velocity in the center of mass frame"""
@@ -5508,6 +5569,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed['delphes_card'] = ''
             if self.has_rivet:
                 allowed['rivet_card'] = ''
+            if self.has_fo_card:
+                allowed['fo_card'] = ''
         
         elif len(args) == 2:
             if args[1] == 'run_card':
@@ -5532,6 +5595,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 allowed = {'delphes_card':'default'}
             elif args[1] == 'rivet_card':
                 allowed = {'rivet_card':'default'}
+            elif args[1] == 'fo_card':
+                allowed = {'fo_card':'default'} 
             else:
                 allowed = {'value':''}
 
@@ -5539,6 +5604,7 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
             start = 1
             if args[1] in  ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 
                             'MadLoop_card','pythia8_card','delphes_card','plot_card',
+                            'fo_card',
                             'madanalysis5_parton_card','madanalysis5_hadron_card', 'rivet_card']:
                 start = 2
 
@@ -5576,6 +5642,8 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
                 categories.append('delphes_card')
             if self.has_rivet:
                 categories.append('rivet_card')
+            if self.has_fo_card:
+                categories.append('fo_card')
 
             possibilities['category of parameter (optional)'] = \
                           self.list_completion(text, categories)
@@ -5630,7 +5698,13 @@ def complete_set(self, text, line, begidx, endidx, formatting=True):
         if 'delphes_card' in allowed:
             if allowed['delphes_card'] == 'default':
                 opts = ['default', 'atlas', 'cms']
-            possibilities['Delphes Card'] = self.list_completion(text, opts)              
+            possibilities['Delphes Card'] = self.list_completion(text, opts)
+
+        if 'fo_card' in allowed:
+            opts = self.fo_card.string_vars
+            if allowed['fo_card'] == 'default':
+                opts.append('default') 
+            possibilities['FO Card'] = self.list_completion(text, opts)               
 
         if 'value' in list(allowed.keys()):
             opts = ['default', 'scale']
@@ -5733,21 +5807,28 @@ def do_set(self, line):
         if args[0] in self.special_shortcut:
             targettypes , cmd = self.special_shortcut[args[0]]
             if len(args) != len(targettypes) +1:
-                logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
-                if len(args) < len(targettypes) +1:
-                    return
+                if len(targettypes) == 1 and args[len(targettypes)].startswith('scan'):
+                    args = args[:len(targettypes)] + [' '.join(args[len(targettypes):])]
+                    targettypes = [str]
                 else:
-                    logger.warning('additional argument will be ignored')
+                    logger.warning('shortcut %s requires %s argument' % (args[0], len(targettypes)))
+                    if len(args) < len(targettypes) +1:
+                        return
+                    else:
+                        logger.warning('additional argument will be ignored')
             values ={}
             for i, argtype in enumerate(targettypes):           
                 try:  
-                    values = {str(i): banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])}
+                    values[str(i)] = banner_mod.ConfigFile.format_variable(args[i+1], argtype, args[0])
                 except ValueError as e:
                     logger.warning("Wrong argument: The entry #%s should be of type %s.", i+1, argtype)
                     return
                 except InvalidCmd as e:
-                    logger.warning(str(e))
-                    return
+                    if isinstance(args[i+1], str) and args[i+1].startswith('scan'):
+                        values[str(i)] = args[i+1]
+                    else:
+                        logger.warning(str(e))
+                        return
             #else:
             #    logger.warning("too many argument for this command")
             #    return
@@ -5787,7 +5868,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line QCUT = %s in pythia_card.dat' % args[1])
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*QCUT\s*=\s*[\de\+\-\.]*\s*$''',
                                     ''' QCUT = %s ''' % args[1], \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5801,7 +5882,7 @@ def do_set(self, line):
             if os.path.exists(pythia_path):
                 logger.info('add line SHOWERKT = %s in pythia_card.dat' % args[1].upper())
                 p_card = open(pythia_path,'r').read()
-                p_card, n = re.subn('''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
+                p_card, n = re.subn(r'''^\s*SHOWERKT\s*=\s*[default\de\+\-\.]*\s*$''',
                                     ''' SHOWERKT = %s ''' % args[1].upper(), \
                                     p_card, flags=(re.M+re.I))
                 if n==0:
@@ -5856,7 +5937,7 @@ def do_set(self, line):
                          pjoin(self.me_dir,'Cards', 'delphes_card.dat'))
                 return
 
-        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card',
+        if args[0] in ['run_card', 'param_card', 'MadWeight_card', 'shower_card', 'fo_card',
                        'delphes_card','madanalysis5_hadron_card','madanalysis5_parton_card','rivet_card']:
 
             if args[1] == 'default':
@@ -6176,6 +6257,22 @@ def do_set(self, line):
             self.setRivet(args[start], value, default=default)
             self.rivet_card.write(self.paths['rivet'], self.paths['rivet_default'])
 
+        elif self.has_fo_card and (card in ['', 'fo_card'])\
+             and args[start].lower() in [k.lower() for k in self.fo_card.string_vars]:
+            
+            if args[start] in self.conflict and card == '':
+                text = 'ambiguous name (present in more than one card). Please specify which card to edit'
+                logger.warning(text)
+                return
+            if args[start+1] == 'default':
+                value = self.fo_card_default[args[start]]
+                default = True
+            else:
+                value = args[start+1]
+                default = False 
+            self.fo_card[args[start]] = value
+            self.modified_card.add('fo_card') 
+
         #INVALID --------------------------------------------------------------
         else:      
             logger.warning('invalid set command %s ' % line)
@@ -6222,12 +6319,13 @@ def setM(self, block, name, value):
     
     def setR(self, name, value):
 
-        if self.mother_interface.inputfile:
-            self.run_card.set(name, value, user=True, raiseerror=True)
-        else:
-            self.run_card.set(name, value, user=True)
-        new_value = self.run_card.get(name)
-        logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            if self.mother_interface.inputfile:
+                self.run_card.set(name, value, user=True, raiseerror=True)
+            else:
+                self.run_card.set(name, value, user=True)
+            new_value = self.run_card.get(name)
+            logger.info('modify parameter %s of the run_card.dat to %s' % (name, new_value),'$MG:BOLD')        
 
 
     def setML(self, name, value, default=False):
@@ -6314,6 +6412,7 @@ def check_card_consistency(self):
             
             proc_charac = self.mother_interface.proc_characteristics
             if proc_charac['grouped_matrix'] and \
+                  isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
                   abs(self.run_card['lpp1']) == 1 == abs(self.run_card['lpp2']) and \
                   (self.run_card['nb_proton1'] != self.run_card['nb_proton2'] or
                  self.run_card['nb_neutron1'] != self.run_card['nb_neutron2'] or
@@ -6403,41 +6502,42 @@ def check_card_consistency(self):
 
             # check that only quark/gluon/photon are in initial beam if lpp=+-1
             pdg_in_p = list(range(-6,7))+[21,22]
-            if (abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int):
+                if(abs(self.run_card['lpp1'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial1'])) \
                or (abs(self.run_card['lpp2'])==1 and any(pdg not in pdg_in_p for pdg in proc_charac['pdg_initial2'])):
-                if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
-                    path_to_remove = None
-                    if 'pythia_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia']
-                        card_to_remove = 'pythia_card.dat'
-                    elif 'pythia8_card.dat' in self.cards:
-                        path_to_remove = self.paths['pythia8']
-                        card_to_remove = 'pythia8_card.dat'
-                    if path_to_remove:
-                        if 'partonshower' in self.run_card['bypass_check']:
+                    if 'pythia_card.dat' in self.cards or 'pythia8_card.dat' in self.cards:
+                        path_to_remove = None
+                        if 'pythia_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia']
+                            card_to_remove = 'pythia_card.dat'
+                        elif 'pythia8_card.dat' in self.cards:
+                            path_to_remove = self.paths['pythia8']
+                            card_to_remove = 'pythia8_card.dat'
+                        if path_to_remove:
+                            if 'partonshower' in self.run_card['bypass_check']:
+                                logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
+                            else:    
+                                logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
+                                os.remove(path_to_remove)
+                                self.cards.remove(card_to_remove)
+                    else:
+                        logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
+                elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
+                    if 'pythia8_card.dat' in self.cards:
+                        if self.run_card['pdlabel'] == 'isronlyll':
+                           if 'partonshower' not in self.run_card['bypass_check']:
+                                # force that QED shower is on?
+                                for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
+                                    if param not in self.PY8Card or \
+                                       (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
+                                        logger.warning('Activating QED shower: setting %s to True', param)
+                                        self.PY8Card[param] = True
+                        elif 'partonshower' in self.run_card['bypass_check']:
                             logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                        else:    
+                        else:
                             logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                            os.remove(path_to_remove)
-                            self.cards.remove(card_to_remove)
-                else:
-                    logger.info('Remember that Parton-Shower are not yet ready for such proton component definition (HW implementation in progress).', '$MG:BOLD' )
-            elif (abs(self.run_card['lpp1'])==3 and abs(self.run_card['lpp2'])==3):
-                if 'pythia8_card.dat' in self.cards:
-                    if self.run_card['pdlabel'] == 'isronlyll':
-                       if 'partonshower' not in self.run_card['bypass_check']:
-                            # force that QED shower is on?
-                            for param in ['TimeShower:QEDshowerByQ', 'TimeShower:QEDshowerByL', 'TimeShower:QEDshowerByGamma', 'SpaceShower:QEDshowerByQ', 'SpaceShower:QEDshowerByL']:
-                                if param not in self.PY8Card or \
-                                   (not self.PY8Card[param] and param.lower() not in self.PY8Card.user_set):
-                                    logger.warning('Activating QED shower: setting %s to True', param)
-                                    self.PY8Card[param] = True
-                    elif 'partonshower' in self.run_card['bypass_check']:
-                        logger.warning("forcing to keep parton-shower run while possibly not fully consistent... please be carefull")
-                    else:
-                        logger.error('Parton-Shower are not yet ready for such proton component definition. Parton-shower will be switched off.')
-                        os.remove(self.paths['pythia8'])
-                        self.cards.remove('pythia8_card.dat')
+                            os.remove(self.paths['pythia8'])
+                            self.cards.remove('pythia8_card.dat')
 
                  
         ########################################################################
@@ -6514,7 +6614,8 @@ def check_card_consistency(self):
 
 
             #check relation between lepton PDF // dressed lepton collisions // ...
-            if abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
+            if isinstance(self.run_card['lpp1'],int) and isinstance(self.run_card['lpp2'],int) and \
+                abs(self.run_card['lpp1']) != 1  or  abs(self.run_card['lpp2']) != 1:
                 if abs(self.run_card['lpp1']) == abs(self.run_card['lpp2']) == 3:
                     # this can be dressed lepton or photon-flux
                     if proc_charac['pdg_initial1'] in [[11],[-11]] and  proc_charac['pdg_initial2'] in [[11],[-11]]:
@@ -6732,7 +6833,11 @@ def write_card_param(self):
         """ write the param_card """    
     
         self.param_card.write(self.paths['param'])
-        
+    
+    def write_card_fo_card(self):
+        """ write the fo_card"""
+        self.fo_card.write_card_from_template(self.paths['FO_analyse'], self.paths['FO_analyse_default'])
+
     @staticmethod
     def update_dependent(mecmd, me_dir, param_card, path ,timer=0, run_card=None,
                     lhapdfconfig=None):
@@ -7076,7 +7181,7 @@ def do_decay(self, line):
             #first find the particle
             particle = line.split('>')[0].strip()
             logger.info("change madspin_card to define the decay of %s: %s" %(particle, line.strip()), '$MG:BOLD')
-            particle = particle.replace('+','\+').replace('-','\-')
+            particle = particle.replace('+',r'\+').replace('-',r'\-')
             decay_pattern = re.compile(r"^\s*decay\s+%s\s*>[\s\w+-~]*?$" % particle, re.I+re.M)
             text= open(path).read()
             text = decay_pattern.sub('', text)
@@ -7193,7 +7298,7 @@ def help_edit(self, prefix=True):
         logger.info( '     --clean remove all previously existing line in  the file')
         logger.info( '     --comment_line="<regular-expression>"  comment all lines matching the regular expression')
         logger.info('')
-        logger.info('    Note: all regular-expression will be prefixed by ^\s*')
+        logger.info(r'    Note: all regular-expression will be prefixed by ^\s*')
         logger.info('')
         logger.info( '   example: edit reweight --after_line="change mode\b" change model heft')
         logger.info( '            edit madspin  --after_line="banner" change model XXXX')
@@ -7314,7 +7419,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''replace_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7344,7 +7449,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''comment_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[14:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[14:-1]
                 nb_mod = 0
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
@@ -7366,7 +7471,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern=r'''before_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[13:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[13:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7383,7 +7488,7 @@ def do_add(self, line):
                 text = open(path).read()
                 split = text.split('\n')
                 search_pattern = r'''after_line=(?P<quote>["'])(?:(?=(\\?))\2.)*?\1'''
-                pattern = '^\s*' + re.search(search_pattern, line).group()[12:-1]
+                pattern = r'^\s*' + re.search(search_pattern, line).group()[12:-1]
                 for posline,l in enumerate(split):
                     if re.search(pattern, l):
                         break
@@ -7527,16 +7632,19 @@ def open_file(self, answer):
                 answer = 'plot'
             else:
                 answer = self.cards[int(answer)-self.integer_bias]
-
+        path = ''
         if 'madweight' in answer:
             answer = answer.replace('madweight', 'MadWeight')
         elif 'MadLoopParams' in answer:
             answer = self.paths['ML']
         elif 'pythia8_card' in answer:
             answer = self.paths['pythia8']
+        elif 'FO_analyse' in answer:
+            path = self.paths['FO_analyse']
+            answer = 'fo_card'
         if os.path.exists(answer):
             path = answer
-        else:
+        elif not os.path.exists(path):
             if not '.dat' in answer and not '.lhco' in answer:
                 if answer != 'trigger':
                     path = self.paths[answer]
@@ -7595,7 +7703,8 @@ def reload_card(self, path):
                 logger.error('Please re-open the file and fix the problem.')
                 logger.warning('using the \'set\' command without opening the file will discard all your manual change')
         elif path == self.paths['run']:
-            self.run_card = banner_mod.RunCard(path)
+            with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+                self.run_card = banner_mod.RunCard(path)
         elif path == self.paths['shower']:
             self.shower_card = shower_card_mod.ShowerCard(path)
         elif path == self.paths['ML']:
@@ -7614,6 +7723,8 @@ def reload_card(self, path):
             except:
                 import internal.madweight.Cards as mwcards
             self.mw_card = mwcards.Card(path)
+        elif path == self.paths['FO_analyse']:
+            self.fo_card = FO_analyse_card.FOAnalyseCard(self.paths['FO_analyse']) 
         else:
             logger.debug('not keep in sync: %s', path)
         return path
@@ -7629,6 +7740,9 @@ def scanparamcardhandling(input_path=lambda obj: pjoin(obj.me_dir, 'Cards', 'par
                       iteratorclass=param_card_mod.ParamCardIterator,
                       summaryorder=lambda obj: lambda:None,
                       check_card=lambda obj: CommonRunCmd.static_check_param_card,
+                      run_card_scan=False,
+                      run_card_input= lambda obj: pjoin(obj.me_dir, 'Cards', 'run_card.dat'),
+                      run_card_iteratorclass=banner_mod.RunCardIterator,
                       ):
     """ This is a decorator for customizing/using scan over the param_card (or technically other)
     This should be use like this:
@@ -7678,7 +7792,60 @@ def __enter__(self):
         def __exit__(self, ctype, value, traceback ):
             self.iterator.write(self.path)
     
-    def decorator(original_fct):
+    def scan_over_run_card(original_fct, obj, *args, **opts):
+
+        if isinstance(input_path, str):
+            card_path = run_card_input
+        else:
+            card_path = run_card_input(obj)
+
+        run_card_iterator = run_card_iteratorclass(card_path)
+        orig_card = copy.deepcopy(run_card_iterator.run_card)
+        if not run_card_iterator.run_card.scan_set:
+            return original_fct(obj, *args, **opts)
+        
+        
+        with restore_iterator(orig_card, card_path):
+            # this with statement ensure that the original card is restore
+            # whatever happens inside those block
+
+            if not hasattr(obj, 'allow_notification_center'):
+                obj.allow_notification_center = False
+            with misc.TMP_variable(obj, 'allow_notification_center', False):
+                orig_name = get_run_name(obj)
+                if not orig_name and args[1]:
+                    orig_name = args[1][0]
+                    args = (args[0], args[1][1:])
+                    #orig_name = "scan_%s" % len(obj.results)
+
+                try:
+                    os.mkdir(pjoin(obj.me_dir, 'Events', orig_name))
+                except Exception:
+                    pass
+                next_name = orig_name + "_00"
+
+                for i,card in enumerate(run_card_iterator):
+                    card.write(card_path)
+                    # still have to check for the auto-wdith
+                    #if i !=0:
+                    next_name = run_card_iterator.get_next_name(next_name)
+                    set_run_name(obj)(next_name)
+                    try:
+                        original_fct(obj, *args, **opts)
+                    except ignoreerror as error:
+                        run_card_iterator.store_entry(next_name, {'exception': error})
+                    else:
+                        run_card_iterator.store_entry(next_name, store_for_scan(obj)(), run_card_path=card_path)
+                        
+            #param_card_iterator.write(card_path) #-> this is done by the with statement
+            name = misc.get_scan_name(orig_name, next_name)
+            path = result_path(obj) % name 
+            logger.info("write scan results in %s" % path ,'$MG:BOLD')
+            order = summaryorder(obj)()
+            run_card_iterator.write_summary(path, order=order) 
+
+
+    def decorator(original_fct):        
         def new_fct(obj, *args, **opts):
             
             if isinstance(input_path, str):
@@ -7702,8 +7869,13 @@ def new_fct(obj, *args, **opts):
             
             if not param_card_iterator:
                 #first run of the function
-                original_fct(obj, *args, **opts)
-                return
+                if run_card_scan:
+                    scan_over_run_card(original_fct, obj, *args, **opts)
+                    return
+                else:
+                    #first run of the function
+                    original_fct(obj, *args, **opts)
+                    return
             
             with restore_iterator(param_card_iterator, card_path):
                 # this with statement ensure that the original card is restore
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/extended_cmd.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/extended_cmd.py
index 2f37070580..789976beee 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/extended_cmd.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/extended_cmd.py
@@ -624,12 +624,12 @@ def complete(self, text, state):
                 compfunc = self.completenames
 
             # correct wrong splittion with '\ '
-            if line and begidx > 2 and line[begidx-2:begidx] == '\ ':
+            if line and begidx > 2 and line[begidx-2:begidx] == r'\ ':
                 Ntext = line.split(os.path.sep)[-1]
-                self.completion_prefix = Ntext.rsplit('\ ', 1)[0] + '\ '
+                self.completion_prefix = Ntext.rsplit(r'\ ', 1)[0] + r'\ '
                 to_rm = len(self.completion_prefix) - 1
                 Nbegidx = len(line.rsplit(os.path.sep, 1)[0]) + 1
-                data = compfunc(Ntext.replace('\ ', ' '), line, Nbegidx, endidx)
+                data = compfunc(Ntext.replace(r'\ ', ' '), line, Nbegidx, endidx)
                 self.completion_matches = [p[to_rm:] for p in data 
                                               if len(p)>to_rm]                
             # correct wrong splitting with '-'/"="
@@ -742,7 +742,7 @@ def path_completion(text, base_dir = None, only_dirs = False,
             completion += [prefix + f for f in ['.'+os.path.sep, '..'+os.path.sep] if \
                        f.startswith(text) and not prefix.startswith('.')]
         
-        completion = [a.replace(' ','\ ') for a in completion]
+        completion = [a.replace(' ',r'\ ') for a in completion]
         return completion
 
 
@@ -1253,7 +1253,7 @@ def check_answer_in_input_file(self, question_instance, default, path=False, lin
                 return possibility[0]
         if '=' in line and ' ' in line.strip():
             leninit = len(line)
-            line,n = re.subn('\s*=\s*','=', line)
+            line,n = re.subn(r'\s*=\s*','=', line)
             if n and len(line) != leninit:
                 return self.check_answer_in_input_file(question_instance, default, path=path, line=line)
             
@@ -1311,7 +1311,7 @@ def nice_error_handling(self, error, line):
         if os.path.exists(self.debug_output):
             os.remove(self.debug_output)
         try:
-            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', '\ '))
+            super(Cmd,self).onecmd('history %s' % self.debug_output.replace(' ', r'\ '))
         except Exception as error:
             logger.error(error)
 
@@ -2001,6 +2001,7 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
         text = ""
         has_mg5_path = False
         # Use local configuration => Need to update the path
+        already_written = set()
         for line in open(basefile):
             if '=' in line:
                 data, value = line.split('=',1)
@@ -2018,9 +2019,12 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 comment = ''    
             if key in to_keep:
                 value = str(to_keep[key])
-            else:
+            elif line not in already_written:
+                already_written.add(line)
                 text += line
                 continue
+            else:
+                continue
             if key == 'mg5_path':
                 has_mg5_path = True
             try:
@@ -2032,14 +2036,20 @@ def write_configuration(self, filepath, basefile, basedir, to_keep):
                 # check if absolute path
                 if not os.path.isabs(value):
                     value = os.path.realpath(os.path.join(basedir, value))
-            text += '%s = %s # %s \n' % (key, value, comment)
+            new_line = '%s = %s # %s \n' % (key, value, comment)
+            if new_line not in already_written: 
+                text += new_line
+                already_written.add(new_line)
         for key in to_write:
             if key in to_keep:
-                text += '%s = %s \n' % (key, to_keep[key])
+                new_line = '%s = %s \n' % (key, to_keep[key])
+                if new_line not in already_written: 
+                    text += new_line
         
         if not MADEVENT and not has_mg5_path:
-            text += """\n# MG5 MAIN DIRECTORY\n"""
-            text += "mg5_path = %s\n" % MG5DIR         
+            if "mg5_path = %s\n" % MG5DIR not in already_written: 
+                text += """\n# MG5 MAIN DIRECTORY\n"""
+                text += "mg5_path = %s\n" % MG5DIR         
         
         writer = open(filepath,'w')
         writer.write(text)
@@ -2190,7 +2200,7 @@ def onecmd(self, line, **opt):
                 raise
             
     def reask(self, reprint_opt=True):
-        pat = re.compile('\[(\d*)s to answer\]')
+        pat = re.compile(r'\[(\d*)s to answer\]')
         prev_timer = signal.alarm(0) # avoid timer if any
         
         if prev_timer:     
@@ -2991,7 +3001,7 @@ def question_formatting(self, nb_col = 80,
                                   lpotential_switch=0,
                                   lnb_key=0,
                                   key=None):
-        """should return four lines:
+        r"""should return four lines:
         1. The upper band (typically /========\ 
         2. The lower band (typically \========/
         3. The line without conflict | %(nb)2d. %(descrip)-20s %(name)5s = %(switch)-10s |
@@ -3239,13 +3249,13 @@ def create_question(self, help_text=True):
                 data_to_format['conflict_switch'] = self.color_for_value(key,self.inconsistent_keys[key], consistency=False)
                 
                 if hidden_line: 
-                    f2 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f2 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f2)
                 text.append(f2 % data_to_format)
             elif hidden_line:
                 if not f3:
-                    f3 = re.sub('%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
+                    f3 = re.sub(r'%(\((?:name|descrip|add_info)\)-?)(\d+)s', 
                                 lambda x: '%%%s%ds' % (x.group(1),int(x.group(2))+9),
                                  f1)
                 text.append(f3 % data_to_format)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/file_writers.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/file_writers.py
index 41bff05276..526756129f 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/file_writers.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/file_writers.py
@@ -36,10 +36,10 @@ class FileWriter(io.FileIO):
 
     supported_preprocessor_commands = ['if']
     preprocessor_command_re=re.compile(
-                          "\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
+                          r"\s*(?P<command>%s)\s*\(\s*(?P<body>.*)\s*\)\s*{\s*"\
                                    %('|'.join(supported_preprocessor_commands)))
     preprocessor_endif_re=re.compile(\
-    "\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
+    r"\s*}\s*(?P<endif>else)?\s*(\((?P<body>.*)\))?\s*(?P<new_block>{)?\s*")
     
     class FileWriterError(IOError):
         """Exception raised if an error occurs in the definition
@@ -191,15 +191,15 @@ class FortranWriterError(FileWriter.FileWriterError):
         pass
 
     # Parameters defining the output of the Fortran writer
-    keyword_pairs = {'^if.+then\s*$': ('^endif', 2),
-                     '^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
-                     '^do(?!\s+\d+)\s+': ('^enddo\s*$', 2),
-                     '^subroutine': ('^end\s*$', 0),
-                     '^module': ('^end\s*$', 0),
-                     'function': ('^end\s*$', 0)}
-    single_indents = {'^else\s*$':-2,
-                      '^else\s*if.+then\s*$':-2}
-    number_re = re.compile('^(?P<num>\d+)\s+(?P<rest>.*)')
+    keyword_pairs = {r'^if.+then\s*$': ('^endif', 2),
+                     r'^type(?!\s*\()\s*.+\s*$': ('^endtype', 2),
+                     r'^do(?!\s+\d+)\s+': (r'^enddo\s*$', 2),
+                     '^subroutine': (r'^end\s*$', 0),
+                     '^module': (r'^end\s*$', 0),
+                     'function': (r'^end\s*$', 0)}
+    single_indents = {r'^else\s*$':-2,
+                      r'^else\s*if.+then\s*$':-2}
+    number_re = re.compile(r'^(?P<num>\d+)\s+(?P<rest>.*)')
     line_cont_char = '$'
     comment_char = 'c'
     uniformcase = True #force everyting to be lower/upper case 
@@ -212,7 +212,7 @@ class FortranWriterError(FileWriter.FileWriterError):
     # Private variables
     __indent = 0
     __keyword_list = []
-    __comment_pattern = re.compile(r"^(\s*#|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|!)", re.IGNORECASE)
+    __comment_pattern = re.compile(r"^(\s*#|c\$|c$|(c\s+([^=]|$))|cf2py|c\-\-|c\*\*|\s*!|!\$)", re.IGNORECASE)
     __continuation_line = re.compile(r"(?:     )[$&]")
 
     def write_line(self, line):
@@ -424,26 +424,20 @@ def count_number_of_quotes(self, line):
             i = i + 1
         return len(splitline)-1
 
-    def remove_routine(self, text, fct_names, formatting=True):
-        """write the incoming text but fully removing the associate routine/function
-           text can be a path to a file, an iterator, a string
-           fct_names should be a list of functions to remove
+    @staticmethod   
+    def get_routine(text, fct_names, call_back=None):
+        """
+        get the fortran function from a fortran file
         """
-
         f77_type = ['real*8', 'integer', 'double precision', 'logical']
-        pattern = re.compile('^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
+        pattern = re.compile(r'^\s+(?:SUBROUTINE|(?:%(type)s)\s+function)\s+([a-zA-Z]\w*)' \
                              % {'type':'|'.join(f77_type)}, re.I)
-        
+
+        if isinstance(text, str):
+            text = text.split('\n')
+
+        to_write=False
         removed = []
-        if isinstance(text, str):   
-            if '\n' in text:
-                text = text.split('\n')
-            else:
-                text = open(text)
-        if isinstance(fct_names, str):
-            fct_names = [fct_names]
-        
-        to_write=True     
         for line in text:
             fct = pattern.findall(line)
             if fct:
@@ -451,22 +445,38 @@ def remove_routine(self, text, fct_names, formatting=True):
                     to_write = False
                 else:
                     to_write = True
-
             if to_write:
-                if formatting:
-                    if line.endswith('\n'):
-                        line = line[:-1]
-                    self.writelines(line)
-                else:
-                    if not line.endswith('\n'):
-                        line = '%s\n' % line
-                    super(FileWriter,self).writelines(line)
+                if call_back:
+                    call_back(line)
             else:
                 removed.append(line)
-                
+
         return removed
+    
+    def remove_routine(self, text, fct_names, formatting=True):
+        """write the incoming text but fully removing the associate routine/function
+           text can be a path to a file, an iterator, a string
+           fct_names should be a list of functions to remove
+        """
+
+        def call_back(line):
+            if formatting:
+                if line.endswith('\n'):
+                    line = line[:-1]
+                self.writelines(line)
+            else:
+                if not line.endswith('\n'):
+                    line = '%s\n' % line
+                super(FileWriter,self).writelines(line) 
+     
+        return self.get_routine(text, fct_names, call_back)
         
 
+
+class FortranWriter90(FortranWriter):
+
+    comment_char = '        !'
+
 #===============================================================================
 # CPPWriter
 #===============================================================================
@@ -497,50 +507,50 @@ class CPPWriterError(FileWriter.FileWriterError):
                             '^private': standard_indent,
                             '^protected': standard_indent}
     
-    spacing_patterns = [
-                        ('\s*\"\s*}', '\"'),
-                        ('\s*,\s*', ', '),
-                        ('\s*-\s*', ' - '),
-                        ('([{(,=])\s*-\s*', '\g<1> -'),
-                        ('(return)\s*-\s*', '\g<1> -'),
-                        ('\s*\+\s*', ' + '),
-                        ('([{(,=])\s*\+\s*', '\g<1> +'),
-                        ('\(\s*', '('),
-                        ('\s*\)', ')'),
-                        ('\{\s*', '{'),
-                        ('\s*\}', '}'),
-                        ('\s*=\s*', ' = '),
-                        ('\s*>\s*', ' > '),
-                        ('\s*<\s*', ' < '),
-                        ('\s*!\s*', ' !'),
-                        ('\s*/\s*', '/'),
-                        ('(?<!\(|\*)\s*\*\s*(?!\*)', ' * '),
-                        ('\s*-\s+-\s*', '-- '),
-                        ('\s*\+\s+\+\s*', '++ '),
-                        ('\s*-\s+=\s*', ' -= '),
-                        ('\s*\+\s+=\s*', ' += '),
-                        ('\s*\*\s+=\s*', ' *= '),
-                        ('\s*/=\s*', ' /= '),
-                        ('\s*>\s+>\s*', ' >> '),
-                        ('<\s*double\s*>>\s*', '<double> > '),
-                        ('\s*<\s+<\s*', ' << '),
-                        ('\s*-\s+>\s*', '->'),
-                        ('\s*=\s+=\s*', ' == '),
-                        ('\s*!\s+=\s*', ' != '),
-                        ('\s*>\s+=\s*', ' >= '),
-                        ('\s*<\s+=\s*', ' <= '),
-                        ('\s*&&\s*', ' && '),
-                        ('\s*\|\|\s*', ' || '),
-                        ('\s*{\s*}', ' {}'),
-                        ('\s*;\s*', '; '),
-                        (';\s*\}', ';}'),
-                        (';\s*$}', ';'),
-                        ('\s*<\s*([a-zA-Z0-9]+?)\s*>', '<\g<1>>'),
-                        ('^#include\s*<\s*(.*?)\s*>', '#include <\g<1>>'),
-                        ('(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
-                         '\g<1>e\g<2>\g<3>'),
-                        ('\s+',' '),
-                        ('^\s*#','#')]
+    spacing_patterns = [(r'\s*\"\s*}', '\"'),
+                        (r'\s*,\s*', ', '),
+                        (r'\s*-\s*', ' - '),
+                        (r'([{(,=])\s*-\s*', r'\g<1> -'),
+                        (r'(return)\s*-\s*', r'\g<1> -'),
+                        (r'\s*\+\s*', ' + '),
+                        (r'([{(,=])\s*\+\s*', r'\g<1> +'),
+                        (r'\(\s*', '('),
+                        (r'\s*\)', ')'),
+                        (r'\{\s*', '{'),
+                        (r'\s*\}', '}'),
+                        (r'\s*=\s*', ' = '),
+                        (r'\s*>\s*', ' > '),
+                        (r'\s*<\s*', ' < '),
+                        (r'\s*!\s*', ' !'),
+                        (r'\s*/\s*', '/'),
+                        (r'\s*\*\s*', ' * '),
+                        (r'\s*-\s+-\s*', '-- '),
+                        (r'\s*\+\s+\+\s*', '++ '),
+                        (r'\s*-\s+=\s*', ' -= '),
+                        (r'\s*\+\s+=\s*', ' += '),
+                        (r'\s*\*\s+=\s*', ' *= '),
+                        (r'\s*/=\s*', ' /= '),
+                        (r'\s*>\s+>\s*', ' >> '),
+                        (r'<\s*double\s*>>\s*', '<double> > '),
+                        (r'\s*<\s+<\s*', ' << '),
+                        (r'\s*-\s+>\s*', '->'),
+                        (r'\s*=\s+=\s*', ' == '),
+                        (r'\s*!\s+=\s*', ' != '),
+                        (r'\s*>\s+=\s*', ' >= '),
+                        (r'\s*<\s+=\s*', ' <= '),
+                        (r'\s*&&\s*', ' && '),
+                        (r'\s*\|\|\s*', ' || '),
+                        (r'\s*{\s*}', ' {}'),
+                        (r'\s*;\s*', '; '),
+                        (r';\s*\}', ';}'),
+                        (r';\s*$}', ';'),
+                        (r'\s*<\s*([a-zA-Z0-9]+?)\s*>', r'<\g<1>>'),
+                        (r'^#include\s*<\s*(.*?)\s*>', r'#include <\g<1>>'),
+                        (r'(\d+\.{0,1}\d*|\.\d+)\s*[eE]\s*([+-]{0,1})\s*(\d+)',
+                         r'\g<1>e\g<2>\g<3>'),
+                        (r'\s+',' '),
+                        (r'^\s*#','#')]
+    
     spacing_re = dict([(key[0], re.compile(key[0])) for key in \
                        spacing_patterns])
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_crossxhtml.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_crossxhtml.py
index d58ec573bc..681bf9d09b 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_crossxhtml.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_crossxhtml.py
@@ -648,7 +648,7 @@ def recreate(self, banner):
             if run_card['ickkw'] != 0:
                 #parse the file to have back the information
                 pythia_log = misc.BackRead(pjoin(path, '%s_pythia.log' % tag))
-                pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+                pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
                 for line in pythia_log:
                     info = pythiare.search(line)
                     if not info:
@@ -1623,7 +1623,7 @@ def get_html(self, runresults):
         elif self.debug:
             text = str(self.debug).replace('. ','.<br>')
             if 'http' in text:
-                pat = re.compile('(http[\S]*)')
+                pat = re.compile(r'(http[\S]*)')
                 text = pat.sub(r'<a href=\1> here </a>', text)
             debug = '<br><font color=red>%s<BR>%s</font>' % \
                                            (self.debug.__class__.__name__, text)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_ximprove.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_ximprove.py
index c86da36a05..415ecc9de0 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_ximprove.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/gen_ximprove.py
@@ -158,8 +158,11 @@ def get_helicity(self, to_submit=True, clean=True):
             (stdout, _) = p.communicate(''.encode())
             stdout = stdout.decode('ascii',errors='ignore')
             if stdout:
-                nb_channel = max([math.floor(float(d)) for d in stdout.split()])
+                lines = stdout.strip().split('\n')
+                nb_channel = max([math.floor(float(d)) for d in lines[-1].split()])
             else:
+                if os.path.exists(pjoin(self.me_dir, 'error')):
+                    os.remove(pjoin(self.me_dir, 'error'))
                 for matrix_file in misc.glob('matrix*orig.f', Pdir):
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                 P_zero_result.append(Pdir)
@@ -297,12 +300,14 @@ def get_helicity(self, to_submit=True, clean=True):
                     bad_amps_perhel = []
                 if __debug__:
                     mtext = open(matrix_file).read()
-                    nb_amp = int(re.findall('PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
-                    logger.debug('nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
+                    nb_amp = int(re.findall(r'PARAMETER \(NGRAPHS=(\d+)\)', mtext)[0])
+                    logger.debug('(%s) nb_hel: %s zero amp: %s bad_amps_hel: %s/%s', split_file[-1], len(good_hels),len(bad_amps),len(bad_amps_perhel), len(good_hels)*nb_amp )
                 if len(good_hels) == 1:
                     files.cp(matrix_file, matrix_file.replace('orig','optim'))
                     continue # avoid optimization if onlye one helicity
-                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel)
+                
+                gauge = self.cmd.proc_characteristics['gauge']
+                recycler = hel_recycle.HelicityRecycler(good_hels, bad_amps, bad_amps_perhel, gauge=gauge)
                 # In case of bugs you can play around with these:
                 recycler.hel_filt = self.run_card['hel_filtering']
                 recycler.amp_splt = self.run_card['hel_splitamp']
@@ -1299,7 +1304,7 @@ def get_job_for_event(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # needed for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1387,7 +1392,7 @@ def create_ajob(self, template, jobs, write_dir=None):
                     break
                 info = jobs[j]
                 info['script_name'] = 'ajob%i' % script_number
-                info['keeplog'] = 'false'
+                info['keeplog'] = 'false' if self.run_card['keep_log'] != 'debug' else 'true'
                 if "base_directory" not in info:
                     info["base_directory"] = "./"
                 fsock.write(template_text % info)
@@ -1456,7 +1461,7 @@ def get_job_for_precision(self):
                     'script_name': 'unknown',
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': C.parent_name, 
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # used for RO gridpack
                     'offset': 1,            # need to be change for splitted job
                     'nevents': nevents,
                     'maxiter': self.max_iter,
@@ -1916,7 +1921,7 @@ def get_job_for_event(self):
                     'directory': C.name,    # need to be change for splitted job
                     'P_dir': os.path.basename(C.parent_name), 
                     'offset': 1,            # need to be change for splitted job
-                    #'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name),
+                    'Ppath': pjoin(self.cmd.me_dir, 'SubProcesses', C.parent_name), # use for RO gridpack
                     'nevents': nevents, #int(nevents*self.gen_events_security)+1,
                     'maxiter': self.max_iter,
                     'miniter': self.min_iter,
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/hel_recycle.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/hel_recycle.py
index dfa45d5d20..6c86611f68 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/hel_recycle.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/hel_recycle.py
@@ -383,7 +383,7 @@ def get_number(cls, *args):
 class HelicityRecycler():
     '''Class for recycling helicity'''
 
-    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
+    def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[], gauge='U'):
 
         External.good_hel = []
         External.nhel_lines = ''
@@ -427,6 +427,7 @@ def __init__(self, good_elements, bad_amps=[], bad_amps_perhel=[]):
 
         self.all_hel = []
         self.hel_filt = True
+        self.gauge = gauge
 
     def set_input(self, file):
         if 'born_matrix' in file:
@@ -612,7 +613,7 @@ def unfold_helicities(self, line, nature):
 
     def apply_amps(self, line, new_objs):
         if self.amp_splt:
-            return split_amps(line, new_objs)  
+            return split_amps(line, new_objs, gauge=self.gauge)  
         else: 
 
             return apply_args(line, [i.args for i in new_objs])
@@ -785,7 +786,7 @@ def apply_args(old_line, all_the_args):
     
     return ''.join(new_lines)
 
-def split_amps(line, new_amps):
+def split_amps(line, new_amps, gauge):
     if not new_amps:
         return ''
     fct = line.split('(',1)[0].split('_0')[0]
@@ -841,34 +842,31 @@ def split_amps(line, new_amps):
                 spin = fct.split(None,1)[1][to_remove]
                 lines.append('%sP1N_%s(%s)' % (fct, to_remove+1, ', '.join(args)))
 
-            hel, iamp = re.findall('AMP\((\d+),(\d+)\)', amp_result)[0]
+            hel, iamp = re.findall(r'AMP\((\d+),(\d+)\)', amp_result)[0]
             hel_calculated.append(hel)
             #lines.append(' %(result)s = TMP(3) * W(3,%(w)s) + TMP(4) * W(4,%(w)s)+'
             #             % {'result': amp_result, 'w':  windex}) 
             #lines.append('     &             TMP(5) * W(5,%(w)s)+TMP(6) * W(6,%(w)s)'
             #             % {'result': amp_result, 'w':  windex})
-        if spin in "VF":
-            lines.append("""      call CombineAmp(%(nb)i,
-     & (/%(hel_list)s/), 
-     & (/%(w_list)s/),
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })
+        if spin == "F" or ( spin == "V" and gauge !='FD'):
+            suffix = ''
         elif spin == "S":
-            lines.append("""      call CombineAmpS(%(nb)i, 
-     &(/%(hel_list)s/), 
-     & (/%(w_list)s/), 
-     & TMP, W, AMP(1,%(iamp)s))""" %
-                               {'nb': len(sub_amps),
-                                'hel_list': ','.join(hel_calculated),
-                                'w_list': ','.join(windices),
-                                'iamp': iamp
-                               })            
+            suffix = 'S'
+        elif spin == "V" and  gauge == "FD":
+            suffix = "FD"
         else:
-            raise Exception("split amp are not supported for spin2 and 3/2")
+            raise Exception("split amp not supported for spin2, 3/2")
+
+        lines.append("""      call CombineAmp%(suffix)s(%(nb)i,
+     & (/%(hel_list)s/), 
+     & (/%(w_list)s/),
+     & TMP, W, AMP(1,%(iamp)s))""" % {'suffix':suffix,
+                                      'nb': len(sub_amps),
+                                      'hel_list': ','.join(hel_calculated),
+                                      'w_list': ','.join(windices),
+                                      'iamp': iamp
+                                     })
+
             
     #lines.append('')
     return '\n'.join(lines)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/histograms.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/histograms.py
index 98f22c4c3a..9931127f66 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/histograms.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/histograms.py
@@ -632,34 +632,34 @@ class HwU(Histogram):
     # than necessary because the HwU standard allows for spaces from within
     # the name of a weight
     weight_header_re = re.compile(
-                       '&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
+                       r'&\s*(?P<wgt_name>(\S|(\s(?!\s*(&|$))))+)(\s(?!(&|$)))*')
     
     # ================================
     #  Histo weight specification RE's
     # ================================
     # The start of a plot
-    histo_start_re = re.compile('^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
-                                   '(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
+    histo_start_re = re.compile(r'^\s*<histogram>\s*(?P<n_bins>\d+)\s*"\s*'+
+                                   r'(?P<histo_name>(\S|(\s(?!\s*")))+)\s*"\s*$')
     # A given weight specifier
-    a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
-    histo_bin_weight_re = re.compile('(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
-    a_int_re = '[\+|-]?\d+'
+    a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+    histo_bin_weight_re = re.compile(r'(?P<weight>%s|NaN)'%a_float_re,re.IGNORECASE)
+    a_int_re = r'[\+|-]?\d+'
     
     # The end of a plot
     histo_end_re = re.compile(r'^\s*<\\histogram>\s*$')
     # A scale type of weight
-    weight_label_scale = re.compile('^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                   '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_PDF_XML = re.compile('^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
-    weight_label_TMS = re.compile('^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
-    weight_label_alpsfact = re.compile('^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
+    weight_label_scale = re.compile(r'^\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                   r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_PDF_XML = re.compile(r'^\s*pdfset\s*=\s*(?P<PDF_set>\d+)\s*$')
+    weight_label_TMS = re.compile(r'^\s*TMS\s*=\s*(?P<Merging_scale>%s)\s*$'%a_float_re)
+    weight_label_alpsfact = re.compile(r'^\s*alpsfact\s*=\s*(?P<alpsfact>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)
 
-    weight_label_scale_adv = re.compile('^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
-                                        '\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
-                                        '\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
-    weight_label_PDF_adv = re.compile('^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
+    weight_label_scale_adv = re.compile(r'^\s*dyn\s*=\s*(?P<dyn_choice>%s)'%a_int_re+\
+                                        r'\s*mur\s*=\s*(?P<mur_fact>%s)'%a_float_re+\
+                                        r'\s*muf\s*=\s*(?P<muf_fact>%s)\s*$'%a_float_re,re.IGNORECASE)
+    weight_label_PDF_adv = re.compile(r'^\s*PDF\s*=\s*(?P<PDF_set>\d+)\s+(?P<PDF_set_cen>\S+)\s*$')
     
     
     class ParseError(MadGraph5Error):
@@ -926,7 +926,7 @@ def get_HwU_source(self, print_header=True):
                 res.append(' '.join('%+16.7e'%wgt for wgt in list(bin.boundaries)))
             res[-1] += ' '.join('%+16.7e'%bin.wgts[key] for key in 
                 self.bins.weight_labels if key not in ['central','stat_error'])
-        res.append('<\histogram>')
+        res.append(r'<\histogram>')
         return res
     
     def output(self, path=None, format='HwU', print_header=True):
@@ -1149,6 +1149,8 @@ def parse_one_histo_from_stream(self, stream, all_weight_header,
             boundaries = [0.0,0.0]
             for j, weight in \
                       enumerate(HwU.histo_bin_weight_re.finditer(line_bin)):
+                if (j == len(weight_header)):
+                    continue
                 if j == len(all_weight_header):
                     raise HwU.ParseError("There is more bin weights"+\
                               " specified than expected (%i)"%len(weight_header))
@@ -1803,7 +1805,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
             # Filter empty weights coming from the split
             weight_label_list = [wgt.strip() for wgt in 
                 str(selected_run_node.getAttribute('header')).split(';') if
-                                                      not re.match('^\s*$',wgt)]
+                                                      not re.match(r'^\s*$',wgt)]
             ordered_weight_label_list = [w for w in weight_label_list if w not\
                                                              in ['xmin','xmax']]
             # Remove potential repetition of identical weight labels
@@ -1827,7 +1829,7 @@ def parse_histos_from_PY8_XML_stream(self, stream, run_id=None,
         all_weights = []
         for wgt_position, wgt_label in \
             enumerate(str(selected_run_node.getAttribute('header')).split(';')):
-            if not re.match('^\s*$',wgt_label) is None:
+            if not re.match(r'^\s*$',wgt_label) is None:
                 continue
             all_weights.append({'POSITION':wgt_position})
             for wgt_item in wgt_label.strip().split('_'):
@@ -2714,7 +2716,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
 
         # First the global gnuplot header for this histogram group
         global_header =\
-"""
+r"""
 ################################################################################
 ### Rendering of the plot titled '%(title)s'
 ################################################################################
@@ -2862,9 +2864,9 @@ def ratio_no_correlations(wgtsA, wgtsB):
                 major_title = ', '.join(major_title)                    
             
             if not mu[0] in ['none',None]:
-                major_title += ', dynamical\_scale\_choice=%s'%mu[0]
+                major_title += r', dynamical\_scale\_choice=%s'%mu[0]
             if not pdf[0] in ['none',None]:
-                major_title += ', PDF=%s'%pdf[0].replace('_','\_')
+                major_title += ', PDF=%s'%pdf[0].replace('_',r'\_')
 
             # Do not show uncertainties for individual jet samples (unless first
             # or specified explicitely and uniquely)
@@ -2937,7 +2939,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,mu_var+3,color_index,\
-'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
+r'%s dynamical\_scale\_choice=%s' % (title,mu[j])))
             # And now PDF_variation if available
             if not PDF_var_pos is None:
                 for j,PDF_var in enumerate(PDF_var_pos):
@@ -2947,7 +2949,7 @@ def ratio_no_correlations(wgtsA, wgtsB):
                         plot_lines.append(
 "'%s' index %d using (($1+$2)/2):%d ls %d title '%s'"\
 %(HwU_name,block_position+i,PDF_var+3,color_index,\
-'%s PDF=%s' % (title,pdf[j].replace('_','\_'))))
+'%s PDF=%s' % (title,pdf[j].replace('_',r'\_'))))
 
         # Now add the uncertainty lines, those not using a band so that they
         # are not covered by those using a band after we reverse plo_lines
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py
index 7b10bedcef..9ec09eb71d 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/launch_plugin.py
@@ -98,6 +98,7 @@ def default_setup(self):
         self['vector_size'] = 16 # already setup in default class (just change value)
         self['aloha_flag'] = '--fast-math'
         self['matrix_flag'] = '-O3'
+        self['limhel'] = 0
         self.display_block.append('simd')
         self.display_block.append('psoptim')
 
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/lhe_parser.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/lhe_parser.py
index eee9ba4522..f6e47956cd 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/lhe_parser.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/lhe_parser.py
@@ -7,6 +7,7 @@
 import numbers
 import math
 import time
+import copy
 import os
 import shutil
 import sys
@@ -101,7 +102,7 @@ def __init__(self, line=None, event=None):
             self.rwgt = 0
             return
 
-              
+
         self.event = event
         if event is not None: 
             self.event_id = len(event) #not yet in the event
@@ -1066,6 +1067,8 @@ def define_init_banner(self, wgt, lha_strategy, proc_charac=None):
             #special case for 1>N
             init_information = run_card.get_banner_init_information()
             event = next(self)
+            if not len(event): #if parse-momenta was false we have to parse the first event
+                event = Event(str(event))
             init_information["idbmup1"] = event[0].pdg
             init_information["ebmup1"] = event[0].mass
             init_information["idbmup2"] = 0 
@@ -1149,6 +1152,7 @@ def initialize_unweighting(self, getwgt, trunc_error):
                     nb_keep = max(20, int(nb_event*trunc_error*15))
                     new_wgt = new_wgt[-nb_keep:]
             if nb_event == 0:
+                misc.sprint(i,f)
                 raise Exception
             # store the information
             self.initial_nb_events[i] = nb_event
@@ -1203,7 +1207,6 @@ def unweight(self, outputpath, get_wgt, **opts):
         (stop to write event when target is reached)
         """
 
-
         if isinstance(get_wgt, (str,six.text_type)):
             unwgt_name =get_wgt 
             def get_wgt_multi(event):
@@ -1483,12 +1486,12 @@ def reorder_mother_child(self):
                 particle.mother2 -= 1  
         # re-call the function for the next potential change   
         return self.reorder_mother_child()
-         
-        
-        
         
-        
-   
+
+
+
+
+
     def parse_reweight(self):
         """Parse the re-weight information in order to return a dictionary
            {key: value}. If no group is define group should be '' """
@@ -1522,6 +1525,37 @@ def parse_nlo_weight(self, real_type=(1,11), threshold=None):
                                                threshold=threshold)
             return self.nloweight
 
+    def get_fks_pair(self, real_type=(1,11), threshold=None):
+        """ Gives the fks pair labels"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            all_line = text.split('\n')
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+        return wgt.to_merge_pdg,wgt.nexternal
+
+    def get_born_momenta(self,real_type=(1,11), threshold=None):
+        """ Gets the underlying n+1 body kinematics"""
+        start, stop = self.tag.find('<mgrwgt>'), self.tag.find('</mgrwgt>')
+        if start != -1 != stop:
+            text = self.tag[start+8:stop]
+            text = text.lower().replace('d','e')
+            all_line = text.split('\n')
+            for line in all_line:
+                data = line.split()
+                if len(data)>16:
+                    wgt = OneNLOWeight(line, real_type=real_type)
+            nexternal = wgt.nexternal
+            real_momenta = all_line[2:2+nexternal]
+        return real_momenta
+
+
+
     def rewrite_nlo_weight(self, wgt=None):
         """get the string associate to the weight"""
         
@@ -1558,11 +1592,11 @@ def parse_lo_weight(self):
             return self.loweight
         
         if not hasattr(Event, 'loweight_pattern'):
-            Event.loweight_pattern = re.compile('''<rscale>\s*(?P<nqcd>\d+)\s+(?P<ren_scale>[\d.e+-]+)\s*</rscale>\s*\n\s*
-                                    <asrwt>\s*(?P<asrwt>[\s\d.+-e]+)\s*</asrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb1>1|2)["']?\>\s*(?P<beam1>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <pdfrwt\s+beam=["']?(?P<idb2>1|2)["']?\>\s*(?P<beam2>[\s\d.e+-]*)\s*</pdfrwt>\s*\n\s*
-                                    <totfact>\s*(?P<totfact>[\d.e+-]*)\s*</totfact>
+            Event.loweight_pattern = re.compile('''<rscale>\\s*(?P<nqcd>\\d+)\\s+(?P<ren_scale>[\\d.e+-]+)\\s*</rscale>\\s*\n\\s*
+                                    <asrwt>\\s*(?P<asrwt>[\\s\\d.+-e]+)\\s*</asrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb1>1|2)["']?\\>\\s*(?P<beam1>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <pdfrwt\\s+beam=["']?(?P<idb2>1|2)["']?\\>\\s*(?P<beam2>[\\s\\d.e+-]*)\\s*</pdfrwt>\\s*\n\\s*
+                                    <totfact>\\s*(?P<totfact>[\\d.e+-]*)\\s*</totfact>
             ''',re.X+re.I+re.M)
         
         start, stop = self.tag.find('<mgrwt>'), self.tag.find('</mgrwt>')
@@ -1615,7 +1649,7 @@ def parse_matching_scale(self):
         self.matched_scale_data = []
         
 
-        pattern  = re.compile("<scales\s|</scales>")
+        pattern  = re.compile(r"<scales\s|</scales>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1623,7 +1657,7 @@ def parse_matching_scale(self):
             tmp = {}
             start,content, end = data
             self.tag = "%s%s" % (start, end)
-            pattern = re.compile("pt_clust_(\d*)=\"([\de+-.]*)\"")
+            pattern = re.compile("pt_clust_(\\d*)=\"([\\de+-.]*)\"")
             for id,value in pattern.findall(content):
                 tmp[int(id)] = float(value)
             for i in range(1, len(self)+1):
@@ -1647,7 +1681,7 @@ def parse_syscalc_info(self):
             return self.syscalc_data
         
         pattern  = re.compile("<mgrwt>|</mgrwt>")
-        pattern2 = re.compile("<(?P<tag>[\w]*)(?:\s*(\w*)=[\"'](.*)[\"']\s*|\s*)>(.*)</(?P=tag)>")
+        pattern2 = re.compile("<(?P<tag>[\\w]*)(?:\\s*(\\w*)=[\"'](.*)[\"']\\s*|\\s*)>(.*)</(?P=tag)>")
         data = re.split(pattern,self.tag)
         if len(data) == 1:
             return []
@@ -1850,6 +1884,240 @@ def get_decay(self, pdg_code=0, event_id=None):
         
         return new_event
 
+
+    def set_initial_mass_to_zero(self):
+        """set the masses of the initial particles to zero, by reshuffling the respective momenta
+        Works only in the **partonic** com frame, so the event must be boosted to such frame
+        before calling the function
+        """
+
+        if  not misc.equal(self[0].px, 0) or not misc.equal(self[1].px, 0) or \
+            not misc.equal(self[0].py, 0) or not misc.equal(self[1].py, 0) or \
+            not misc.equal(self[0].pz, - self[1].pz, zero_limit=False):
+            misc.sprint(self[0])
+            misc.sprint(self[1])
+            raise Exception('momenta should be in the partonic center of mass frame') 
+
+        self[0].mass = 0.
+        self[1].mass = 0.
+        tot_E=0.
+        for ip,part in enumerate(self):
+            if part.status == 1 :
+                tot_E += part.E
+        if (self[0].pz > 0. and self[1].pz < 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+        elif (self[0].pz < 0. and self[1].pz > 0):
+            self[0].set_momentum(FourMomentum([tot_E/2., 0., 0., -tot_E/2.]))
+            self[1].set_momentum(FourMomentum([tot_E/2., 0., 0., tot_E/2.]))
+        else:
+            logger.critical('ERROR: two incoming partons not back.-to-back')
+
+    def set_final_jet_mass_to_zero(self):
+        """set the final light particle masses to zero
+        """
+
+        for ip,part in enumerate(self):
+            if ((abs(part.pid) <= 5) or (abs(part.pid) == 11) or (abs(part.pid) == 12)) and (part.status == 1):
+                part.mass = 0.
+                E_1_new = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                part.set_momentum(FourMomentum([E_1_new, part.px, part.py, part.pz]))
+
+
+
+    def merge_particles_kinematics(self, i,j, moth):
+        """Map to an underlying n-body kinematics for two given 
+           particles i,j to be merged and a resulting moth"""
+        """ note! kinematics (and id) mapping only! """
+
+        recoil = True
+        fks_type = False
+
+        if recoil and not fks_type:
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            
+            merge_i = self[fks_i]
+            merge_j = self[fks_j]
+        
+            i_4mom = FourMomentum(merge_i)
+            j_4mom = FourMomentum(merge_j)
+            if (fks_i <= 1):
+                sign1 = -1.0
+            else:
+                sign1 = 1.0
+            mother_4mom = i_4mom + sign1*j_4mom
+        
+            new_event = copy.deepcopy(self)
+
+            self[fks_i].pid = moth[0]['id']
+            self[fks_i].set_momentum(mother_4mom)
+
+            if fks_i <= 1: # initial-state recoil
+                new_p = FourMomentum()
+                for ip,part in enumerate(self):
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        new_p += part
+                
+                if fks_i == 0:
+                    self[1].set_momentum(new_p - FourMomentum(self[0]))
+                elif fks_i == 1:
+                    self[0].set_momentum(new_p - FourMomentum(self[1]))
+                
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+                
+            if fks_i > 1: # final-state recoil
+
+                # Re-scale the energy of fks_i to make it on-shell
+                for ip,part in enumerate(self):
+                    if (ip == fks_i):
+                        part.E = math.sqrt(part.mass**2 + part.px**2 + part.py**2 + part.pz**2)
+                        new_p.E = part.E
+
+                # Find the overall energy in the final state
+                new_p.E = 0.0
+                for ip,part in enumerate(self):
+                    if (ip != fks_j and ip >= 2):
+                        new_p.E +=  part.E
+                
+                # Use one of the initial states to absorb the energy change in the final state
+                self[1].set_momentum(FourMomentum([new_p.E-self[0].E,self[1].px,self[1].py,self[1].pz]))
+                
+                # Change the initial state pz and E
+                pz_1_new = self.recoil_eq(self[0],self[1])
+                pz_2_new = self[0].pz + self[1].pz - pz_1_new
+                E_1_new = math.sqrt(self[0].mass**2 + self[0].px**2 + self[0].py**2 + pz_1_new **2)
+                E_2_new = math.sqrt(self[1].mass**2 + self[1].px**2 + self[1].py**2 + pz_2_new **2)
+                self[0].set_momentum(FourMomentum([E_1_new,self[0].px,self[0].py,pz_1_new]))
+                self[1].set_momentum(FourMomentum([E_2_new,self[1].px,self[1].py,pz_2_new]))
+                self.pop(to_remove)
+            
+        elif fks_type and not recoil:        
+            ## Do it in a more FKS-style
+            if (i == moth[0].get('number')-1):
+                fks_i = i
+                fks_j = j
+            elif (j == moth[0].get('number')-1):
+                fks_i = j
+                fks_j = i
+            to_remove = fks_j
+            new_event = copy.copy(event)
+
+            if fks_i <= 1: # initial-state recoil
+
+                # First boost to partonic CM frame
+                q = FourMomentum(self[0])+FourMomentum(self[1])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum(part)
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+
+                k_tot = FourMomentum([self[0].E+self[1].E-self[fks_j].E,self[0].px+self[1].px-self[fks_j].px,\
+                            self[0].py+self[1].py-self[fks_j].py,self[0].pz+self[1].pz-self[fks_j].pz])
+
+                final = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    if (ip != fks_i and ip != fks_j and ip >= 2):
+                        final = final + vec
+                        
+                s = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])**2
+                ksi = self[fks_j].E/(math.sqrt(s)/2.0)
+                y = self[fks_j].pz/self[fks_j].E
+
+                self[0].pz = self[0].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0+y))/((2.0-ksi*(1.0-y))))
+                self[0].E = math.sqrt(self[0].mass**2 + self[0].pz**2)
+                self[1].pz = self[1].pz * math.sqrt(1.0-ksi)*math.sqrt((2.0-ksi*(1.0-y))/((2.0-ksi*(1.0+y))))
+                self[1].E = math.sqrt(self[1].mass**2 + self[1].pz**2)
+
+                final = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k_tot_1 = k_tot.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                k_tot_2 = k_tot_1.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                k_tot_3 = k_tot_2.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+
+                for ip,part in enumerate(self):
+                    if (ip >= 2):
+                        vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                        vec2 = vec.zboost(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        vec3 = vec2.pt_boost(pboost=FourMomentum([k_tot_1.E,k_tot_1.px,k_tot_1.py,k_tot_1.pz]))
+                        vec_new = vec3.zboost_inv(pboost=FourMomentum([k_tot.E,k_tot.px,k_tot.py,k_tot.pz]))
+                        self[ip].set_momentum(FourMomentum([vec_new.E,vec_new.px,vec_new.py,vec_new.pz]))
+                
+                self.pop(to_remove)
+
+            else: # final-state recoil
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                for ip,part in enumerate(self):
+                    vec = FourMomentum([part.E,part.px,part.py,part.pz])
+                    self[ip].set_momentum(vec.zboost(pboost=q))
+            
+                q = FourMomentum([self[0].E+self[1].E,self[0].px+self[1].px,\
+                            self[0].py+self[1].py,self[0].pz+self[1].pz])
+
+                k = FourMomentum([self[fks_i].E+self[fks_j].E,self[fks_i].px+self[fks_j].px,\
+                            self[fks_i].py+self[fks_j].py,self[fks_i].pz+self[fks_j].pz])
+
+                k_rec = FourMomentum([0,0,0,0])
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j: # add only final-states to the recoil and not the FKS pair
+                        k_rec = k_rec + FourMomentum([part.E,part.px,part.py,part.pz])
+
+                k_mom = math.sqrt(k_rec.px**2 + k_rec.py**2 + k_rec.pz**2)
+                beta = (q**2 - (k_rec.E+k_mom)**2)/(q**2 + (k_rec.E+k_mom)**2)
+                for ip,part in enumerate(self):
+                    if ip >= 2 and ip != fks_i and ip != fks_j:
+                        vec = FourMomentum([self[ip].E,self[ip].px,self[ip].py,self[ip].pz])
+                        self[ip].set_momentum(vec.boost_beta(beta,k_rec))
+                    if ip == fks_i:
+                        self[ip].set_momentum(q - k_rec.boost_beta(beta,k_rec))
+                self.pop(to_remove)
+        else:
+            logger.info('Error in Sudakov Born mapping: no recoil scheme found!')
+
+    def recoil_eq(self,part1, part2):
+        """ In general, solves the equation
+        E1 + E2 = K 
+        p1 + p2 = c
+        E1^2 - p1^2 = a
+        E2^2 - p2^2 = b
+        and returns p1
+        """
+        thresh = 1e-6
+        import random
+        a = part1.mass**2 + part1.px**2 + part1.py**2
+        b = part2.mass**2 + part2.px**2 + part2.py**2
+        c = part1.pz + part2.pz
+        K = part1.E + part2.E
+        K2 = K**2
+        sol1 = (-a*c + b*c + c**3 - c*K2 - math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        sol2 = (-a*c + b*c + c**3 - c*K2 + math.sqrt(K2*(a**2 + (b + c**2 - K2)**2 - 2*a*(b - c**2 + K2))))/(2*(c**2-K2))
+        
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - (math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))) > thresh:
+            logger.critical('Error in recoil_eq solver 1')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(math.sqrt(a+sol2**2) + math.sqrt(b+(c-sol2)**2))
+        if abs(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2) - K) > thresh:
+            logger.critical('Error in recoil_eq solver 2')
+            logger.critical(math.sqrt(a+sol1**2) + math.sqrt(b+(c-sol1)**2))
+            logger.critical(K)
+        return sol1
+
+
     def boost(self, filter=None):
         """modify the current event to boost it according to the current filter"""
         if filter is None:
@@ -1861,7 +2129,7 @@ def boost(self, filter=None):
                 if list(filter(p)):
                     pboost += p
         else:
-            pboost = FourMomentum(pboost)
+            pboost = FourMomentum(filter)
 
         # change sign of three-component due to helas convention
         pboost.px *=-1
@@ -1877,7 +2145,7 @@ def check(self):
         """check various property of the events"""
         
         # check that relative error is under control
-        threshold = 1e-6
+        threshold = 1e-4
         
         #1. Check that the 4-momenta are conserved
         E, px, py, pz = 0,0,0,0
@@ -1920,7 +2188,50 @@ def check(self):
         self.check_color_structure() 
         
         #3. check mass
-                   
+
+    def check_kinematics_only(self):
+        """check various property of the events - only kinematics"""
+        
+        # check that relative error is under control
+        threshold = 1e-3
+       
+        #1. Check that the 4-momenta are conserved
+        E, px, py, pz = 0,0,0,0
+        absE, abspx, abspy, abspz = 0,0,0,0
+        for particle in self:
+            coeff = 1
+            if particle.status == -1:
+                coeff = -1
+            elif particle.status != 1:
+                continue
+            E += coeff * particle.E
+            absE += abs(particle.E)
+            px += coeff * particle.px
+            py += coeff * particle.py
+            pz += coeff * particle.pz
+            abspx += abs(particle.px)
+            abspy += abs(particle.py)
+            abspz += abs(particle.pz)
+            # check mass
+            fourmass = FourMomentum(particle).mass
+            
+            if particle.mass and (abs(particle.mass) - fourmass)/ abs(particle.mass) > threshold:
+                logger.critical(self)
+                raise Exception( "Do not have correct mass lhe: %s momentum: %s (error at %s" % (particle.mass, fourmass, (abs(particle.mass) - fourmass)/ abs(particle.mass)))
+
+        if abs(E/absE) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Energy %s, %s" % (E/absE, E))
+        if abs(px/abspx) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Px %s, %s" % (px/abspx, px))         
+        if abs(py/abspy) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Py %s, %s" % (py/abspy, py))
+        if abs(pz/abspz) > threshold:
+            logger.critical(self)
+            raise Exception("Do not conserve Pz %s, %s" % (pz/abspz, pz))
+                 
          
     def assign_scale_line(self, line, convert=True):
         """read the line corresponding to global event line
@@ -2764,7 +3075,7 @@ def zboost(self, pboost=None, E=0, pz=0):
         if isinstance(pboost, FourMomentum):
             E = pboost.E
             pz = pboost.pz
-        
+
         #beta = pz/E
         gamma = E / math.sqrt(E**2-pz**2)
         gammabeta = pz  / math.sqrt(E**2-pz**2)
@@ -2778,6 +3089,74 @@ def zboost(self, pboost=None, E=0, pz=0):
             out.pz = 0
         return out
     
+    def zboost_inv(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only z boost applied).
+        """
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            pz = pboost.pz
+
+        #beta = pz/E
+        gamma = E / math.sqrt(E**2-pz**2)
+        gammabeta = pz  / math.sqrt(E**2-pz**2)
+        
+        out =  FourMomentum([gamma*self.E + gammabeta*self.pz,
+                            self.px,
+                            self.py,
+                            gamma*self.pz + gammabeta*self.E])
+        
+        if abs(out.pz) < 1e-6 * out.E:
+            out.pz = 0
+        return out
+
+
+    def pt_boost(self, pboost=None, E=0, pz=0):
+        """Both momenta should be in the same frame. 
+           The boost perform correspond to the boost required to set pboost at 
+           rest (only pT boost applied).
+        """
+
+        if isinstance(pboost, FourMomentum):
+            E = pboost.E
+            px = pboost.px
+            py = pboost.py
+            mass = math.sqrt(E**2 - px**2 - py**2)
+
+        betax = px/E
+        betay = py/E
+        beta = math.sqrt(betax**2+betay**2)
+        gamma = 1 / math.sqrt(1.0-beta**2)
+        
+        out =  FourMomentum([gamma*self.E - gamma*betax*self.px - gamma*betay*self.py,
+                            -gamma*betax*self.E + (1.0 + (gamma-1.0)*betax**2/(beta**2))*self.px + (gamma-1.0)*betax*betay/(beta**2)*self.py,
+                            -gamma*betay*self.E + ((gamma-1.0)*betax*betay/(beta**2))*self.px + (1.0+(gamma-1.0)*(betay**2)/(beta**2))*self.py,
+                            self.pz])
+        
+        if abs(out.px) < 1e-6 * out.E:
+            out.px = 0
+        if abs(out.py) < 1e-6 * out.E:
+            out.py = 0
+        return out
+
+    def boost_beta(self,beta,mom):
+        """ Boost along the three-momentum of mom with a boost of size beta"""
+
+        unit = mom * (1.0/math.sqrt(mom.px**2+mom.py**2+mom.pz**2))
+        beta_vec = beta*unit
+        bx = beta_vec.px
+        by = beta_vec.py
+        bz = beta_vec.pz
+        gamma = 1.0 / math.sqrt(1.0-beta**2)
+
+        out =  FourMomentum([gamma*self.E - gamma*bx*self.px - gamma*by*self.py - gamma*bz*self.pz,
+                            -gamma*bx*self.E + (1.0 + (gamma-1.0)*bx**2/(beta**2))*self.px + (gamma-1.0)*bx*by/(beta**2)*self.py + (gamma-1.0)*bx*bz/(beta**2)*self.pz,
+                            -gamma*by*self.E + ((gamma-1.0)*bx*by/(beta**2))*self.px + (1.0+(gamma-1.0)*(by**2)/(beta**2))*self.py + (gamma-1.0)*by*bz/(beta**2)*self.pz,
+                            -gamma*bz*self.E + (gamma-1.0)*bx*bz/(beta**2)*self.px + (gamma-1.0)*(by*bz)/(beta**2)*self.py + (1.0+(gamma-1.0)*bz**2/(beta**2))*self.pz]) 
+
+        return out
+    
     def boost_to_restframe(self, pboost):
         """apply the boost transformation such that pboost is at rest in the new frame.
         First apply a rotation to allign the pboost to the z axis and then use
@@ -2789,27 +3168,64 @@ def boost_to_restframe(self, pboost):
             return out
         
         
-        # write pboost as (E, p cosT sinF, p sinT sinF, p cosF)
-        # rotation such that it become (E, 0 , 0 , p ) is
-        #  cosT sinF  ,  -sinT  , cosT sinF
-        #  sinT cosF  ,  cosT   , sinT sinF
-        # -sinT       ,   0     , cosF
-        p  =  math.sqrt( pboost.px**2 + pboost.py**2+ pboost.pz**2)
-        cosF = pboost.pz / p
-        sinF = math.sqrt(1-cosF**2)
-        sinT = pboost.py/p/sinF
-        cosT = pboost.px/p/sinF
-        
-        out=FourMomentum([self.E,
-                          self.px*cosT*cosF + self.py*sinT*cosF-self.pz*sinF,
-                          -self.px*sinT+      self.py*cosT,
-                          self.px*cosT*sinF + self.py*sinT*sinF + self.pz*cosF
-                          ])
-        out = out.zboost(E=pboost.E,pz=p)
+        # see here https://physics.stackexchange.com/questions/749036/general-lorentz-boost-of-four-momentum-in-cm-frame-particle-physics
+        vx = pboost.px/pboost.E 
+        vy = pboost.py/pboost.E 
+        vz = pboost.pz/pboost.E 
+        v = pboost.norm/pboost.E
+        v2 = pboost.norm_sq/pboost.E**2
+        gamma = 1./math.sqrt(1.-v**2)
+        gammo = gamma-1.
+        out = FourMomentum(E = gamma*(self.E - vx*self.px - vy*self.py - vz*self.pz),
+                           px= -gamma*vx*self.E + (1+gammo*vx**2/v2)*self.px + gammo*vx*vy/v2*self.py + gammo*vx*vz/v2*self.pz,
+                           py= -gamma*vy*self.E + gammo*vy*vx/v2*self.px + (1+gammo*vy**2/v2)*self.py + gammo*vy*vz/v2*self.pz,
+                           pz= -gamma*vz*self.E + gammo*vz*vx/v2*self.px + gammo*vz*vy/v2*self.py + (1+gammo*vz**2/v2)*self.pz)
+
         return out
         
+    def rotate_to_z(self,prot):
+
+        import math
+        import numpy as np
+
+        z = np.array([0.,0.,1.])
+
+        px = self.px
+        py = self.py
+        pz = self.pz
+
+        refx = prot.px 
+        refy = prot.py
+        refz = prot.pz
+
+        prot_mom = np.array([px, py, pz])
+        ref_mom = np.array([refx, refy, refz])
+
+        # Create normal vector
+        n = np.array([refy, -refx, 0.])
+        n = n * 1./math.sqrt(self.threedot(n,n))
+        t = prot_mom - self.threedot(n,prot_mom)*n
+        p = ref_mom - self.threedot(ref_mom,z)*z
+        p = p/math.sqrt(self.threedot(p,p))
+
+        t_pz = np.array([self.threedot(t,p), self.threedot(t,z), 0.])
+        costheta = self.threedot(ref_mom,z)* 1./math.sqrt(self.threedot(ref_mom, ref_mom))
+        sintheta=math.sqrt(1.-costheta**2)
+
+        sgn = 1.
+        t_pz_p = np.array([0., 0., 0.])
+        t_pz_p[0] = costheta*t_pz[0] + sgn*(-sintheta) * t_pz[1]
+        t_pz_p[1] = sgn*sintheta*t_pz[0] + costheta * t_pz[1]
+
+        out_mom = self.threedot(n,prot_mom)*n + t_pz_p[0]*p + t_pz_p[1]*z
+
+        out = FourMomentum([self.E,out_mom[0], out_mom[1], out_mom[2] ] )
+
+        return out
         
-        
+    def threedot(self,a,b):
+
+        return a[0]*b[0]+a[1]*b[1]+a[2]*b[2]
 
 class OneNLOWeight(object):
         
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/madevent_interface.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/madevent_interface.py
index 2a118e21bf..8e30cf690c 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/madevent_interface.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/madevent_interface.py
@@ -496,7 +496,6 @@ def help_remove(self):
         logger.info("   the optional '-f' allows to by-pass all security question")
         logger.info("   The banner can be remove only if all files are removed first.")
 
-
 class AskRun(cmd.ControlSwitch):
     """a class for the question on what to do on a madevent run"""
 
@@ -2393,13 +2392,17 @@ def do_generate_events(self, line):
         # Check argument's validity
         mode = self.check_generate_events(args)
         switch_mode = self.ask_run_configuration(mode, args)
-        if not args:
-            # No run name assigned -> assigned one automaticaly 
-            self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
-        else:
-            self.set_run_name(args[0], None, 'parton', True)
-            args.pop(0)
-            
+        with misc.TMP_variable(banner_mod.RunCard, 'allow_scan', True):
+            run_card = banner_mod.RunCard(pjoin(self.me_dir, 'Cards', 'run_card.dat'), consistency=False)
+        if not run_card.scan_set:
+            if not args:
+                # No run name assigned -> assigned one automaticaly 
+                self.set_run_name(self.find_available_run_name(self.me_dir), None, 'parton')
+            else:
+                self.set_run_name(args[0], None, 'parton', True)
+                args.pop(0) 
+
+        
         self.run_generate_events(switch_mode, args)
 
         self.postprocessing()
@@ -2560,7 +2563,7 @@ def wait_monitoring(Idle, Running, Done):
             self.update_status("postprocessing contur done", level="rivet")
 
     # this decorator handle the loop related to scan.
-    @common_run.scanparamcardhandling()
+    @common_run.scanparamcardhandling(run_card_scan=True)
     def run_generate_events(self, switch_mode, args):
 
         if self.proc_characteristics['loop_induced'] and self.options['run_mode']==0:
@@ -2593,7 +2596,6 @@ def run_generate_events(self, switch_mode, args):
             # Regular run mode
             logger.info('Generating %s events with run name %s' %
                         (self.run_card['nevents'], self.run_name))
-        
             self.exec_cmd('survey  %s %s' % (self.run_name,' '.join(args)),
                           postcmd=False)
             nb_event = self.run_card['nevents']
@@ -2975,7 +2977,7 @@ def update_width_in_param_card(decay_info, initial=None, output=None):
                         particle = 0
                 # Read BRs for this decay
                 line = param_card[line_number]
-                while re.search('^(#|\s|\d)', line):
+                while re.search(r'^(#|\s|\d)', line):
                     line = param_card.pop(line_number)
                     if not particle or line.startswith('#'):
                         line=param_card[line_number]
@@ -3226,7 +3228,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                 for line in open(pjoin(bias_module_path,'%s.f'%os.path.basename(bias_module_path))):
                     if start and last:
                         break
-                    if not start and not re.search('c\s*parameters\s*=\s*{',line, re.I):
+                    if not start and not re.search(r'c\s*parameters\s*=\s*{',line, re.I):
                         continue
                     start = True
                     if not line.startswith('C'):
@@ -3235,7 +3237,7 @@ def do_treatcards(self, line, mode=None, opt=None):
                     if '{' in line:
                         line = line.split('{')[-1]
                     # split for } ! #
-                    split_result = re.split('(\}|!|\#)', line,1, re.M)
+                    split_result = re.split(r'(\}|!|\#)', line,1, re.M)
                     line = split_result[0]
                     sep = split_result[1] if len(split_result)>1 else None
                     if sep == '}':
@@ -3514,8 +3516,8 @@ def pass_in_difficult_integration_mode(self, rate=1):
         text = open(conf_path).read()
         min_evt, max_evt = 2500 *(2+rate), 10000*(rate+1) 
         
-        text = re.sub('''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
-        text = re.sub('''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
+        text = re.sub(r'''\(min_events = \d+\)''', '(min_events = %i )' % min_evt, text)
+        text = re.sub(r'''\(max_events = \d+\)''', '(max_events = %i )' % max_evt, text)
         fsock = open(conf_path, 'w')
         fsock.write(text)
         fsock.close()
@@ -3619,7 +3621,7 @@ def do_refine(self, line):
                     alljobs = misc.glob('ajob*', Pdir)
                     
                     #remove associated results.dat (ensure to not mix with all data)
-                    Gre = re.compile("\s*j=(G[\d\.\w]+)")
+                    Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
                     for job in alljobs:
                         Gdirs = Gre.findall(open(job).read())
                         for Gdir in Gdirs:
@@ -3727,58 +3729,126 @@ def do_combine_events(self, line):
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
         Gdirs = self.get_Gdir()
         Gdirs.sort()
-        for Gdir in Gdirs:
-            if os.path.exists(pjoin(Gdir, 'events.lhe')):
-                result = sum_html.OneResult('')
-                result.read_results(pjoin(Gdir, 'results.dat'))
-                sum_xsec += result.get('xsec')
-                sum_xerru.append(result.get('xerru'))
-                sum_axsec += result.get('axsec')
-
-                if self.run_card['gridpack'] or self.run_card['nevents']==0:
-                    os.remove(pjoin(Gdir, 'events.lhe'))
-                    continue
+        partials_info = []
+        try:
+            p = subprocess.Popen(["ulimit", "-n"], stdout=subprocess.PIPE)
+            out, err = p.communicate()
+            max_G = out.decode()
+            if max_G == "unlimited":
+                max_G =2500
+            else:
+                max_G = int(max_G) - 40
+        except Exception as  error:
+            logger.debug(error)
+            max_G = 80 # max(20, len(Gdirs)/self.options['nb_core'])
 
-                AllEvent.add(pjoin(Gdir, 'events.lhe'), 
-                             result.get('xsec'),
-                             result.get('xerru'),
-                             result.get('axsec')
-                             )
- 
-                if len(AllEvent) >= 80: #perform a partial unweighting
-                    AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.run_card['nevents'])
-                    AllEvent = lhe_parser.MultiEventFile()
-                    AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % partials),
-                                 sum_xsec,
-                                 math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
-                    partials +=1
-        
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
-        if len(AllEvent) == 0:
-            nb_event = 0 
-        else:
+        mycluster = cluster.MultiCore(nb_core=self.options['nb_core'])
+
+        def split(a, n):
+            """split a list "a" into n chunk of same size (or nearly same size)"""
+            k, m = divmod(len(a), n)
+            return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
+
+        partials_info = [] 
+        if len(Gdirs) >= max_G:
+            start_unweight= time.perf_counter()
+            # first check in how many chunk we have to split (always use a multiple of nb_core)
+            nb_split = 1
+            nb_G = len(Gdirs) // (2* self.options['nb_core'])
+            while nb_G > min(80, max_G):
+               nb_split += 1 
+               nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core'])
+               if nb_G < 10:
+                   nb_split -=1
+                   nb_G = len(Gdirs)//(nb_split*2*self.options['nb_core']) 
+            
+            #enforce at least 10 directory per thread
+            if nb_G > 10 or nb_split>1: 
+                # do the unweighting of each chunk on their own thread
+                nb_chunk = (nb_split*2*self.options['nb_core'])
+            else:
+                nb_chunk = len(Gdirs) // 10 
+                nb_G =10
+            
+            # security that the number of combine events is too large
+            if nb_chunk >= max_G:
+                nb_chunk = max_G -1
+                nb_G = len(Gdirs) // nb_chunk 
+
+            for i, local_G in enumerate(split(Gdirs, nb_chunk)):
+                line = [pjoin(self.me_dir, "Events", self.run_name, "partials%d.lhe.gz" % i)]
+                line.append(pjoin(self.me_dir, 'Events', self.run_name, '%s_%s_banner.txt' % (self.run_name, tag)))
+                line.append(str(self.results.current['cross']))
+                line += local_G
+                partials_info.append(self.do_combine_events_partial(' '.join(line), preprocess_only=True))
+                mycluster.submit(sys.executable, 
+                    [pjoin(self.me_dir, 'bin', 'internal', 'madevent_interface.py'), 'combine_events_partial'] + line,
+                    stdout='/dev/null'
+                )
+                
+            starttime = time.time()
+            update_status = lambda idle, run, finish: \
+                    self.update_status((idle, run, finish, 'unweight'), level=None,
+                                       force=False, starttime=starttime)
+            mycluster.wait(self.me_dir, update_status)
+            # do the final combination
+            for data in partials_info:
+                AllEvent.add(*data)
+            
+            start_unweight= time.perf_counter()
             nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
                           get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
                           log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
                           proc_charac=self.proc_characteristic)
+            
+            #cleaning
+            for data in partials_info:
+                path = data[0]
+                try:
+                    os.remove(path)
+                except Exception as error:
+                    try: 
+                        os.remove(path[:-3]) # try without the .gz
+                    except:
+                        misc.sprint('no file ', path, 'to clean')
+        else:
+            for Gdir in Gdirs:
+                if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                    result = sum_html.OneResult('')
+                    result.read_results(pjoin(Gdir, 'results.dat'))
+                    sum_xsec += result.get('xsec')
+                    sum_xerru.append(result.get('xerru'))
+                    sum_axsec += result.get('axsec')
+
+                    if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                        os.remove(pjoin(Gdir, 'events.lhe'))
+                        continue
+
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                                result.get('xsec'),
+                                result.get('xerru'),
+                                result.get('axsec')
+                                )
+                    
+            if len(AllEvent) == 0:
+                nb_event = 0 
+            else:
+                nb_event = AllEvent.unweight(pjoin(self.me_dir, "Events", self.run_name, "unweighted_events.lhe.gz"),
+                                get_wgt, trunc_error=1e-2, event_target=self.run_card['nevents'],
+                                log_level=logging.DEBUG, normalization=self.run_card['event_norm'],
+                                proc_charac=self.proc_characteristic)
+
+        if nb_event < self.run_card['nevents']:
+            logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
+            logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
+            logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
+            logger.warning("  - reduce the number of requested events (if set too high)")
+            logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
+            logger.warning("  - regenerate your process directory by selecting another gauge (in particular try FD gauge).")
 
-            if nb_event < self.run_card['nevents']:
-                logger.warning("failed to generate enough events. Please follow one of the following suggestions to fix the issue:")
-                logger.warning("  - set in the run_card.dat 'sde_strategy' to %s", 1 + self.run_card['sde_strategy'] % 2)
-                logger.warning("  - set in the run_card.dat  'hard_survey' to 1 or 2.")
-                logger.warning("  - reduce the number of requested events (if set too high)")
-                logger.warning("  - check that you do not have -integrable- singularity in your amplitude.")
 
-        if partials:
-            for i in range(partials):
-                try:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe.gz" % i))
-                except Exception:
-                    os.remove(pjoin(self.me_dir, "Events", self.run_name, "partials%s.lhe" % i))
                    
         self.results.add_detail('nb_event', nb_event)
     
@@ -3789,7 +3859,50 @@ def do_combine_events(self, line):
         logger.info("combination of events done in %s s ", time.time()-start)
         
         self.to_store.append('event')
+
+    ############################################################################ 
+    def do_combine_events_partial(self, line, preprocess_only=False):
+        """ """
+
     
+        AllEvent = lhe_parser.MultiEventFile()
+
+        sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        output, banner_path,cross = line.split()[:3]
+        Gdirs = line.split()[3:]
+
+        cross = float(cross)
+        if not self.banner:
+            self.banner = banner_mod.Banner(banner_path)
+        if not hasattr(self, 'run_card'):
+            self.run_card = banner_mod.RunCard(self.banner['mgruncard'])
+        AllEvent.banner = self.banner
+
+        for Gdir in Gdirs:
+            if os.path.exists(pjoin(Gdir, 'events.lhe')):
+                result = sum_html.OneResult('')
+                result.read_results(pjoin(Gdir, 'results.dat'))
+                sum_xsec += result.get('xsec')
+                sum_xerru.append(result.get('xerru'))
+                sum_axsec += result.get('axsec')
+
+                if self.run_card['gridpack'] or self.run_card['nevents']==0:
+                    os.remove(pjoin(Gdir, 'events.lhe'))
+                    continue
+                if not preprocess_only:
+                    AllEvent.add(pjoin(Gdir, 'events.lhe'), 
+                             result.get('xsec'),
+                             result.get('xerru'),
+                             result.get('axsec')
+                             ) 
+        if preprocess_only:
+            return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+        nb_event = max(min(abs(1.01*self.run_card['nevents']*sum_axsec/cross),self.run_card['nevents']), 10)
+        get_wgt = lambda event: event.wgt   
+        AllEvent.unweight(output,
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)  
+        return output, sum_xsec, math.sqrt(sum(x**2 for x in sum_xerru)), sum_axsec
+
     ############################################################################ 
     def correct_bias(self):
         """check the first event and correct the weight by the bias 
@@ -3902,13 +4015,19 @@ def do_store_events(self, line):
                 #except Exception:
                 #    continue                    
                 # Store log
-                try:
-                    if os.path.exists(pjoin(G_path, 'log.txt')):
-                        input = pjoin(G_path, 'log.txt')
+                input = pjoin(G_path, 'log.txt')
+                if os.path.exists(input): 
+                    if self.run_card['keep_log'] not in ["none", "minimal"]:
                         output = pjoin(G_path, '%s_log.txt' % run)
-                        files.mv(input, output) 
-                except Exception:
-                    continue
+                        try:
+                            files.mv(input, output) 
+                        except Exception:
+                            continue
+                    elif self.run_card['keep_log'] == "none":
+                        try:
+                            os.remove(input)
+                        except Exception:
+                            continue 
                 #try:   
                 #    # Grid
                 #    for name in ['ftn26']:
@@ -3989,7 +4108,7 @@ def do_create_gridpack(self, line):
         misc.call(['./bin/internal/make_gridpack'], cwd=self.me_dir)
         files.mv(pjoin(self.me_dir, 'gridpack.tar.gz'), 
                 pjoin(self.me_dir, '%s_gridpack.tar.gz' % self.run_name))
-        os.system("sed -i.bak \"s/\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
+        os.system("sed -i.bak \"s/\\s*.true.*=.*GridRun/  .false.  =  GridRun/g\" %s/Cards/grid_card.dat" \
                   % self.me_dir)
         self.update_status('gridpack created', level='gridpack')
         
@@ -4476,7 +4595,7 @@ def do_pythia8(self, line):
             else:
                 preamble = misc.get_HEPTools_location_setter(
                                                  pjoin(MG5DIR,'HEPTools'),'lib')
-        preamble += "\n unset PYTHIA8DATA\n"
+        #preamble += "\n unset PYTHIA8DATA\n"
         
         open(pythia_cmd_card,'w').write("""!
 ! It is possible to run this card manually with:
@@ -4691,7 +4810,7 @@ def do_pythia8(self, line):
                     # Make sure to sure the number of split_events determined during the splitting.
                     split_PY8_Card.systemSet('Main:numberOfEvents',partition_for_PY8[i])
                     split_PY8_Card.systemSet('HEPMCoutput:scaling',split_PY8_Card['HEPMCoutput:scaling']*
-                                                             (float(partition_for_PY8[i])/float(n_events)))
+                                                             (float(partition_for_PY8[i])))
                     # Add_missing set to False so as to be sure not to add any additional parameter w.r.t
                     # the ones in the original PY8 param_card copied.
                     split_PY8_Card.write(pjoin(parallelization_dir,'PY8Card_%d.dat'%i),
@@ -4963,9 +5082,9 @@ def wait_monitoring(Idle, Running, Done):
             if cross_sections:
                 # Filter the cross_sections specified an keep only the ones 
                 # with central parameters and a different merging scale
-                a_float_re = '[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
+                a_float_re = r'[\+|-]?\d+(\.\d*)?([EeDd][\+|-]?\d+)?'
                 central_merging_re = re.compile(
-                  '^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
+                  r'^\s*Weight_MERGING\s*=\s*(?P<merging>%s)\s*$'%a_float_re,
                                                                   re.IGNORECASE)                
                 cross_sections = dict(
                     (float(central_merging_re.match(xsec).group('merging')),value)
@@ -5016,8 +5135,8 @@ def wait_monitoring(Idle, Running, Done):
     
     def parse_PY8_log_file(self, log_file_path):
         """ Parse a log file to extract number of event and cross-section. """
-        pythiare = re.compile("Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
-        pythia_xsec_re = re.compile("Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythiare = re.compile(r"Les Houches User Process\(es\)\s*\d+\s*\|\s*(?P<tried>\d+)\s*(?P<selected>\d+)\s*(?P<generated>\d+)\s*\|\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
+        pythia_xsec_re = re.compile(r"Inclusive cross section\s*:\s*(?P<xsec>[\d\.e\-\+]+)\s*(?P<xsec_error>[\d\.e\-\+]+)")
         sigma_m, Nacc, Ntry = None, None, None
         for line in misc.BackRead(log_file_path): 
             info = pythiare.search(line)
@@ -5158,7 +5277,7 @@ def do_pythia(self, line):
             # read the line from the bottom of the file
             #pythia_log = misc.BackRead(pjoin(self.me_dir,'Events', self.run_name, 
             #                                             '%s_pythia.log' % tag))
-            pythiare = re.compile("\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
+            pythiare = re.compile(r"\s*I\s+0 All included subprocesses\s+I\s+(?P<generated>\d+)\s+(?P<tried>\d+)\s+I\s+(?P<xsec>[\d\.D\-+]+)\s+I")            
             for line in misc.reverse_readline(pjoin(self.me_dir,'Events', self.run_name, 
                                                          '%s_pythia.log' % tag)):
                 info = pythiare.search(line)
@@ -5619,8 +5738,8 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 input_files.append(self.get_pdf_input_filename())
                         
                 #Find the correct ajob
-                Gre = re.compile("\s*j=(G[\d\.\w]+)")
-                origre = re.compile("grid_directory=(G[\d\.\w]+)")
+                Gre = re.compile(r"\s*j=(G[\d\.\w]+)")
+                origre = re.compile(r"grid_directory=(G[\d\.\w]+)")
                 try : 
                     fsock = open(exe)
                 except Exception:
@@ -5628,7 +5747,7 @@ def launch_job(self,exe, cwd=None, stdout=None, argument = [], remaining=0,
                 text = fsock.read()
                 output_files = Gre.findall(text)
                 if not output_files:
-                    Ire = re.compile("for i in ([\d\.\s]*) ; do")
+                    Ire = re.compile(r"for i in ([\d\.\s]*) ; do")
                     data = Ire.findall(text)
                     data = ' '.join(data).split()
                     for nb in data:
@@ -6035,7 +6154,7 @@ def get_last_tag(self, level):
                        'syscalc':[],
                        'rivet':['rivet']}
 
-        if name == self.run_name:        
+        if name and name == self.run_name:        
             if reload_card:
                 run_card = pjoin(self.me_dir, 'Cards','run_card.dat')
                 self.run_card = banner_mod.RunCard(run_card)
@@ -6334,7 +6453,7 @@ def run_syscalc(self, mode='parton', event_path=None, output=None):
             elif mode == 'Pythia':
                 stdout = open(pjoin(event_dir, self.run_name, '%s_%s_syscalc.log' % (tag,mode)),'w')
                 if 'mgpythiacard' in self.banner:
-                    pat = re.compile('''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
+                    pat = re.compile(r'''^\s*qcut\s*=\s*([\+\-\d.e]*)''', re.M+re.I)
                     data = pat.search(self.banner['mgpythiacard'])
                     if data:
                         qcut = float(data.group(1))
@@ -6611,7 +6730,7 @@ def get_subP_ids(path):
         for line in open(pjoin(path, 'leshouche.inc')):
             if not 'IDUP' in line:
                 continue
-            particles = re.search("/([\d,-]+)/", line)
+            particles = re.search(r"/([\d,-]+)/", line)
             all_ids.append([int(p) for p in particles.group(1).split(',')])
         return all_ids
     
@@ -6899,6 +7018,7 @@ def do_combine_events(self, line):
         
         partials = 0 # if too many file make some partial unweighting
         sum_xsec, sum_xerru, sum_axsec = 0,[],0
+        partials_info = []
         Gdirs = self.get_Gdir()
         Gdirs.sort()
         for Gdir in Gdirs:
@@ -6917,16 +7037,21 @@ def do_combine_events(self, line):
                 sum_axsec += result.get('axsec')*gscalefact[Gdir]
                 
                 if len(AllEvent) >= 80: #perform a partial unweighting
+                    nb_event = min(abs(1.01*self.nb_event*sum_axsec/self.results.current['cross']),self.run_card['nevents'])
                     AllEvent.unweight(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
-                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=self.nb_event)
+                          get_wgt, log_level=5,  trunc_error=1e-2, event_target=nb_event)
                     AllEvent = lhe_parser.MultiEventFile()
                     AllEvent.banner = self.banner
-                    AllEvent.add(pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
+                    partials_info.append((pjoin(outdir, self.run_name, "partials%s.lhe.gz" % partials),
                                  sum_xsec,
                                  math.sqrt(sum(x**2 for x in sum_xerru)),
-                                 sum_axsec) 
+                                 sum_axsec) )  
+                    sum_xsec, sum_xerru, sum_axsec = 0,[],0
                     partials +=1
         
+        for data in partials_info:
+            AllEvent.add(*data)
+
         if not hasattr(self,'proc_characteristic'):
             self.proc_characteristic = self.get_characteristics()
         
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/misc.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/misc.py
index c4c669f36b..e7fd60be0d 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/misc.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/misc.py
@@ -67,7 +67,7 @@ def parse_info_str(fsock):
     """
 
     info_dict = {}
-    pattern = re.compile("(?P<name>\w*)\s*=\s*(?P<value>.*)",
+    pattern = re.compile(r"(?P<name>\w*)\s*=\s*(?P<value>.*)",
                          re.IGNORECASE | re.VERBOSE)
     for entry in fsock:
         entry = entry.strip()
@@ -84,7 +84,7 @@ def parse_info_str(fsock):
 def glob(name, path=''):
     """call to glob.glob with automatic security on path"""
     import glob as glob_module
-    path = re.sub('(?P<name>\?|\*|\[|\])', '[\g<name>]', path)
+    path = re.sub(r'(?P<name>\?|\*|\[|\])', r'[\g<name>]', path)
     return glob_module.glob(pjoin(path, name))
 
 #===============================================================================
@@ -614,10 +614,10 @@ def mod_compilator(directory, new='gfortran', current=None, compiler_type='gfort
     #search file
     file_to_change=find_makefile_in_dir(directory)
     if compiler_type == 'gfortran':
-        comp_re = re.compile('^(\s*)FC\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)FC\s*=\s*(.+)\s*$')
         var = 'FC'
     elif compiler_type == 'cpp':
-        comp_re = re.compile('^(\s*)CXX\s*=\s*(.+)\s*$')
+        comp_re = re.compile(r'^(\s*)CXX\s*=\s*(.+)\s*$')
         var = 'CXX'
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
@@ -861,9 +861,9 @@ def detect_current_compiler(path, compiler_type='fortran'):
 #    comp = re.compile("^\s*FC\s*=\s*(\w+)\s*")
 #   The regular expression below allows for compiler definition with absolute path
     if compiler_type == 'fortran':
-        comp = re.compile("^\s*FC\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*FC\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     elif compiler_type == 'cpp':
-        comp = re.compile("^\s*CXX\s*=\s*([\w\/\\.\-]+)\s*")
+        comp = re.compile("^\\s*CXX\\s*=\\s*([\\w\\/\\.\\-]+)\\s*")
     else:
         MadGraph5Error, 'Unknown compiler type: %s' % compiler_type
 
@@ -1001,7 +1001,19 @@ def call_stdout(arg, *args, **opt):
 def copytree(src, dst, symlinks = False, ignore = None):
   if not os.path.exists(dst):
     os.makedirs(dst)
-    shutil.copystat(src, dst)
+    try:
+      shutil.copystat(src, dst)
+    except PermissionError:
+        if os.path.realpath(src).startswith('/cvmfs') and os.path.realpath(dst).startswith('/afs'):
+           # allowing missmatch from cvmfs to afs since sounds to not create issue --at least in general-- 
+           logger.critical(f'Ignoring that we could not copy permissions from {src} to {dst}')
+        else:
+           logger.critical(f'Permission error detected from {src} to {dst}.\n'+\
+                          'If you are using WSL with windows partition, please try using python3.12\n'+\
+                          'or avoid moving your data from the WSL partition to the UNIX one')
+           # we do not have enough experience in WSL to allow it to get trough.
+           raise
+      
   lst = os.listdir(src)
   if ignore:
     excl = ignore(src, lst)
@@ -1895,12 +1907,12 @@ class EasterEgg(object):
     May4_banner = "*                           _____                          *\n" + \
         "*                       ,-~\"     \"~-.                      *\n" + \
         "*        *            ,^ ___         ^.             *      *\n" + \
-        "*          *         / .^   ^.         \         *         *\n" + \
+        "*          *         / .^   ^.         \\         *         *\n" + \
         "*            *      Y  l  o  !          Y      *           *\n" + \
         "*              *   l_  `.___.'         _,[   *             *\n" + \
         "*                * |^~\"--------------~\"\"^| *               *\n" + \
         "*              *   !     May the 4th     !   *             *\n" + \
-        "*            *       \                 /       *           *\n" + \
+        "*            *       \\                 /       *           *\n" + \
         "*          *          ^.             .^          *         *\n" + \
         "*        *              \"-.._____.,-\"              *       *\n"
 
@@ -1909,13 +1921,13 @@ class EasterEgg(object):
         "* M::::::::::M       M::::::::::M                          *\n" + \
         "* M:::::::::::M     M:::::::::::M   (_)___                 *\n" + \
         "* M:::::::M::::M   M::::M:::::::M   | / __|                *\n" + \
-        "* M::::::M M::::M M::::M M::::::M   | \__ \                *\n" + \
+        "* M::::::M M::::M M::::M M::::::M   | \\__ \\                *\n" + \
         "* M::::::M  M::::M::::M  M::::::M   |_|___/                *\n" + \
         "* M::::::M   M:::::::M   M::::::M                          *\n" + \
         "* M::::::M    M:::::M    M::::::M    / _| ___  _ __        *\n" + \
-        "* M::::::M     MMMMM     M::::::M   | |_ / _ \| '__|       *\n" + \
+        "* M::::::M     MMMMM     M::::::M   | |_ / _ \\| '__|       *\n" + \
         "* M::::::M               M::::::M   |  _| (_) | |          *\n" + \
-        "* M::::::M               M::::::M   |_/\/\___/|_|          *\n" + \
+        "* M::::::M               M::::::M   |_/\\/\\___/|_|          *\n" + \
         "* M::::::M               M::::::M                          *\n" + \
         "* MMMMMMMM               MMMMMMMM                          *\n" + \
         "*                                                          *\n" + \
@@ -2233,39 +2245,51 @@ def import_python_lhapdf(lhapdfconfig):
                 os.environ['LD_LIBRARY_PATH'] = lhapdf_libdir
             else:
                 os.environ['LD_LIBRARY_PATH'] = '%s:%s' %(lhapdf_libdir,os.environ['LD_LIBRARY_PATH'])
-        
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir) \
                             if os.path.isdir(os.path.join(lhapdf_libdir,dirname))]
         except OSError:
             candidates=[]
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib')):
+            candidates += [pjoin(os.pardir,'local', 'lib', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir,candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir,candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir,candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir,candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as  error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
     if not use_lhapdf:
         try:
             candidates=[dirname for dirname in os.listdir(lhapdf_libdir+'64') \
                             if os.path.isdir(os.path.join(lhapdf_libdir+'64',dirname))]
         except OSError:
             candidates=[]
-
+        if os.path.isdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib64')):
+            candidates += [pjoin(os.pardir,'local', 'lib64', dirname) for dirname in os.listdir(pjoin(lhapdf_libdir, os.pardir, 'local', 'lib'))
+                           if os.path.isdir(os.path.join(lhapdf_libdir,os.pardir, 'local', 'lib64', dirname))]
         for candidate in candidates:
-            if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate,'site-packages')):
-                sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate,'site-packages'))
-                try:
-                    import lhapdf
-                    use_lhapdf=True
-                    break
-                except ImportError:
-                    sys.path.pop(0)
-                    continue
+            for subdir in ['site-packages', 'dist-packages']:
+                if os.path.isdir(os.path.join(lhapdf_libdir+'64',candidate, subdir)):
+                    sys.path.insert(0,os.path.join(lhapdf_libdir+'64',candidate, subdir))
+                    try:
+                        import lhapdf
+                        use_lhapdf=True
+                        break
+                    except ImportError as error:
+                        sys.path.pop(0)
+                        continue
+            else:
+                continue
+            break
         if not use_lhapdf:
             try:
                 import lhapdf
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/shower_card.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/shower_card.py
index e87d534177..16ed72b1a0 100755
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/shower_card.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/shower_card.py
@@ -288,7 +288,7 @@ def write(self, output_file, template=None, python_template=False,
             self.text = open(template,'r').read()
 
 
-        key_re = re.compile('^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
+        key_re = re.compile(r'^(\s*)([\S^#]+)(\s*)=(\s*)([^#]*?)(\s*)(\#.*|$)')
         newlines = []
         for line in self.text.split('\n'):
             key_match = key_re.findall(line)
diff --git a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/systematics.py b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/systematics.py
index 28eaed00e2..2acebd087c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/bin/internal/systematics.py
+++ b/epochX/cudacpp/susy_gg_tt.mad/bin/internal/systematics.py
@@ -582,7 +582,7 @@ def print_cross_sections(self, all_cross, nb_event, stdout):
             else:
                 resume.write( '#PDF %s: %g +%2.3g%% -%2.3g%%\n' % (pdfset.name, pdferr.central,pdferr.errplus*100/all_cross[0], pdferr.errminus*100/all_cross[0]))
 
-        dyn_name = {1: '\sum ET', 2:'\sum\sqrt{m^2+pt^2}', 3:'0.5 \sum\sqrt{m^2+pt^2}',4:'\sqrt{\hat s}' }
+        dyn_name = {1: r'\sum ET', 2:r'\sum\sqrt{m^2+pt^2}', 3:r'0.5 \sum\sqrt{m^2+pt^2}',4:r'\sqrt{\hat s}' }
         for key, curr in dyns.items():
             if key ==-1:
                 continue
@@ -789,7 +789,7 @@ def get_id(self):
             return int(self.start_wgt_id)
         
         if 'initrwgt' in self.banner:
-            pattern = re.compile('<weight id=(?:\'|\")([_\w]+)(?:\'|\")', re.S+re.I+re.M)
+            pattern = re.compile('<weight id=(?:\'|\")([_\\w]+)(?:\'|\")', re.S+re.I+re.M)
             matches =  pattern.findall(self.banner['initrwgt'])
             matches.append('0') #ensure to have a valid entry for the max 
             return  max([int(wid) for wid in  matches if wid.isdigit()])+1
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
index 2519e3902b..6e8544babe 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/HelAmps_MSSM_SLHA2.h
@@ -8,7 +8,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.cc b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.cc
index 9e3e129f09..b4b54aa1de 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.cc
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.cc
@@ -7,7 +7,7 @@
 // Further modified by: J. Teig, A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h
index bef5fad561..31fd56dc6c 100644
--- a/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h
+++ b/epochX/cudacpp/susy_gg_tt.mad/src/Parameters_MSSM_SLHA2.h
@@ -7,7 +7,7 @@
 // Further modified by: A. Valassi (2021-2024) for the MG5aMC CUDACPP plugin.
 //==========================================================================
 // This file has been automatically generated for CUDA/C++ standalone by
-// MadGraph5_aMC@NLO v. 3.5.3_lo_vect, 2023-12-23
+// MadGraph5_aMC@NLO v. 3.6.0_lo_vect, 2024-06-17
 // By the MadGraph5_aMC@NLO Development Team
 // Visit launchpad.net/madgraph5 and amcatnlo.web.cern.ch
 //==========================================================================
diff --git a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
index e7cf22d27d..206640f843 100644
--- a/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
+++ b/epochX/cudacpp/susy_gg_tt.sa/CODEGEN_cudacpp_susy_gg_tt_log.txt
@@ -14,7 +14,7 @@ Running MG5 in debug mode
 *                   *        * *        *                  *
 *                 *                       *                *
 *                                                          *
-*         VERSION 3.5.3_lo_vect         2023-12-23         *
+*         VERSION 3.6.0_lo_vect         2024-06-17         *
 [1;31m*                                                          *[1;0m
 [1;31m*          WARNING: UNKNOWN DEVELOPMENT VERSION.           *[1;0m
 [1;31m*            WARNING: DO NOT USE FOR PRODUCTION            *[1;0m
@@ -45,20 +45,18 @@ Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (inclu
 Note that you can still compile and run aMC@NLO with the built-in PDFs
  MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
 
-None does not seem to correspond to a valid lhapdf-config executable. 
-Please set the 'lhapdf' variable to the (absolute) /PATH/TO/lhapdf-config (including lhapdf-config).
-Note that you can still compile and run aMC@NLO with the built-in PDFs
- MG5_aMC> set lhapdf /PATH/TO/lhapdf-config
-
 Using default text editor "vi". Set another one in ./input/mg5_configuration.txt
 Using default eps viewer "evince". Set another one in ./input/mg5_configuration.txt
 Using default web browser "firefox". Set another one in ./input/mg5_configuration.txt
-import /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg
+import /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt.mg
 The import format was not given, so we guess it as command
 set stdout_level DEBUG
 set output information to level: 10
 set zerowidth_tchannel F
 import model MSSM_SLHA2
+INFO: load particles 
+INFO: load vertices 
+[1;32mDEBUG: model prefixing  takes 0.9339261054992676 [0m
 INFO: Restrict model MSSM_SLHA2 with file models/MSSM_SLHA2/restrict_default.dat . 
 INFO: Detect SLHA2 format. keeping restricted parameter in the param_card 
 [1;32mDEBUG: Simplifying conditional expressions [0m
@@ -554,17 +552,16 @@ INFO: Please specify coupling orders to bypass this step.
 INFO: Trying coupling order WEIGHTED<=2: WEIGTHED IS QCD+2*QED 
 INFO: Trying process: g g > t t~ WEIGHTED<=2 @1  
 INFO: Process has 3 diagrams 
-1 processes with 3 diagrams generated in 0.119 s
+1 processes with 3 diagrams generated in 0.112 s
 Total: 1 processes with 3 diagrams
 output standalone_cudacpp ../TMPOUT/CODEGEN_cudacpp_susy_gg_tt
 Load PLUGIN.CUDACPP_OUTPUT
-[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.5.3_lo_vect. 
+[1;34mPlugin PLUGIN.CUDACPP_OUTPUT has marked as NOT being validated with this version: 3.6.0_lo_vect. 
 It has been validated for the last time with version: 3.5.2[0m
 [1mOutput will be done with PLUGIN: CUDACPP_OUTPUT[0m
-[1;32mDEBUG:  cformat = [0m plugin [1;30m[export_cpp.py at line 3070][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.__init__ (initialise the exporter) [1;30m[output.py at line 165][0m [0m
 [1;32mDEBUG:  Entering PLUGIN_ProcessExporter.copy_template (initialise the directory) [1;30m[output.py at line 170][0m [0m
-INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt 
+INFO: Creating subdirectories in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt 
 INFO: Organizing processes into subprocess groups 
 INFO: Generating Helas calls for process: g g > t t~ WEIGHTED<=2 @1 
 INFO: Processing color information for process: g g > t t~ @1 
@@ -573,30 +570,30 @@ INFO: Processing color information for process: g g > t t~ @1
 [1;32mDEBUG:    type(fortran_model)=<class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_GPUFOHelasCallWriter'> [1;30m[output.py at line 214][0m [0m
 [1;32mDEBUG:    type(me)=<class 'int'> me=0 [1;30m[output.py at line 215][0m [0m
 [1;32mDEBUG:  "need to link", self.to_link_in_P = [0m need to link ['nvtx.h', 'timer.h', 'timermap.h', 'ompnumthreads.h', 'GpuRuntime.h', 'GpuAbstraction.h', 'MemoryAccessHelpers.h', 'MemoryAccessVectors.h', 'MemoryAccessMatrixElements.h', 'MemoryAccessMomenta.h', 'MemoryAccessRandomNumbers.h', 'MemoryAccessWeights.h', 'MemoryAccessAmplitudes.h', 'MemoryAccessWavefunctions.h', 'MemoryAccessGs.h', 'MemoryAccessCouplingsFixed.h', 'MemoryAccessNumerators.h', 'MemoryAccessDenominators.h', 'MemoryAccessChannelIds.h', 'EventStatistics.h', 'CommonRandomNumbers.h', 'CrossSectionKernels.cc', 'CrossSectionKernels.h', 'MatrixElementKernels.cc', 'MatrixElementKernels.h', 'RamboSamplingKernels.cc', 'RamboSamplingKernels.h', 'RandomNumberKernels.h', 'CommonRandomNumberKernel.cc', 'CurandRandomNumberKernel.cc', 'HiprandRandomNumberKernel.cc', 'Bridge.h', 'BridgeKernels.cc', 'BridgeKernels.h', 'fbridge.cc', 'fbridge.inc', 'fsampler.cc', 'fsampler.inc', 'MadgraphTest.h', 'runTest.cc', 'testmisc.cc', 'testxxx_cc_ref.txt', 'valgrind.h', 'cudacpp.mk', 'testxxx.cc', 'MemoryBuffers.h', 'MemoryAccessCouplings.h', 'perf.py', 'profile.sh'] [1;30m[output.py at line 216][0m [0m
-INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx 
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc
-INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. 
+INFO: Creating files in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/./CPPProcess.cc
+INFO: Created files CPPProcess.h and CPPProcess.cc in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/SubProcesses/P1_Sigma_MSSM_SLHA2_gg_ttx/. 
 Generated helas calls for 1 subprocesses (3 diagrams) in 0.006 s
 ALOHA: aloha starts to compute helicity amplitudes
 ALOHA: aloha creates VVV1 set of routines with options: P0[0m
 ALOHA: aloha creates FFV1 routines[0m
-ALOHA: aloha creates 2 routines in  0.138 s
+ALOHA: aloha creates 2 routines in  0.150 s
 <class 'aloha.create_aloha.AbstractRoutine'> VVV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
 <class 'aloha.create_aloha.AbstractRoutine'> FFV1
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
-INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./HelAmps_MSSM_SLHA2.h
+INFO: Created file HelAmps_MSSM_SLHA2.h in directory /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 super_write_set_parameters_onlyfixMajorana (hardcoded=False)
 super_write_set_parameters_onlyfixMajorana (hardcoded=True)
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
-FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.h
+FileWriter <class 'PLUGIN.CUDACPP_OUTPUT.model_handling.PLUGIN_CPPWriter'> for /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/./Parameters_MSSM_SLHA2.cc
 INFO: Created files Parameters_MSSM_SLHA2.h and Parameters_MSSM_SLHA2.cc in directory 
-INFO: /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuX/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
+INFO: /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. and /data/avalassi/GPU2023/madgraph4gpuBis/MG5aMC/TMPOUT/CODEGEN_cudacpp_susy_gg_tt/src/. 
 quit
 
-real	0m1.318s
-user	0m1.225s
-sys	0m0.058s
-Code generation completed in 1 seconds
+real	0m2.452s
+user	0m2.316s
+sys	0m0.082s
+Code generation completed in 3 seconds
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index 7e3c901212..0c0dfe85e2 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_06:58:49
+DATE: 2024-09-15_12:24:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3837 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6900s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6821s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.03E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7657s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7582s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1766s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1688s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0078s for     8192 events => throughput is 1.06E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2200s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2125s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0075s for     8192 events => throughput is 1.09E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3754s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2924s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0830s for    90112 events => throughput is 1.09E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7343s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6595s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0749s for    81920 events => throughput is 1.09E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424320E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1770s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1695s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0072s for     8192 events => throughput is 1.14E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2319s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2242s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0074s for     8192 events => throughput is 1.11E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424320E-002) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3701s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2924s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0774s for    90112 events => throughput is 1.16E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7234s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6509s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0723s for    81920 events => throughput is 1.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370556E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.163883e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.158974e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.178699e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.174895e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1755s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1709s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0043s for     8192 events => throughput is 1.92E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173944E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2302s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2254s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.81E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424348E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173944E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370556E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3392s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2924s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0465s for    90112 events => throughput is 1.94E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6926s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6487s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0436s for    81920 events => throughput is 1.88E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370556E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.935692e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.953684e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.000716e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.997304e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1752s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1714s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.36E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2278s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2239s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0036s for     8192 events => throughput is 2.27E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3264s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2914s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0347s for    90112 events => throughput is 2.59E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6872s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6539s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0330s for    81920 events => throughput is 2.48E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.591750e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.566849e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.740843e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.686525e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1741s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1705s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.50E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2327s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2291s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.49E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3304s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2956s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0344s for    90112 events => throughput is 2.62E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519906E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6890s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6567s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0321s for    81920 events => throughput is 2.55E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519906E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.676787e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.680421e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.817332e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.822232e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424334E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1759s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1713s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0041s for     8192 events => throughput is 1.98E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2235s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2189s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0042s for     8192 events => throughput is 1.94E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952392424334E-002) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3364s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2951s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0410s for    90112 events => throughput is 2.20E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6871s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6460s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0410s for    81920 events => throughput is 2.00E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612798370570E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103909519892E-002) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.088508e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.142328e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.205867e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.216839e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6094s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6059s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0029s for     8192 events => throughput is 2.87E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173971E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6648s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6609s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.48E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673952392424348E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789448173971E-002) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7304s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7222s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0076s for    90112 events => throughput is 1.19E+07 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.1813s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.1726s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0081s for    81920 events => throughput is 1.01E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552612798370570E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103909519892E-002) differ by less than 3E-14 (0.0)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.361738e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.861787e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.715459e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.181397e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.560694e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.226375e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.907283e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.783312e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.551933e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.210512e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.903581e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.795717e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.557040e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.200044e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.178676e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.159916e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index 10a885fc0b..a35b30e9f9 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 
 make USEBUILDDIR=1 BACKEND=cuda
+make USEBUILDDIR=1 BACKEND=cppnone
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_06:59:05
+DATE: 2024-09-15_12:24:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3837 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7030s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6952s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.04E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7477s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7400s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0076s for     8192 events => throughput is 1.07E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1803s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1724s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.03E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2210s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2132s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0077s for     8192 events => throughput is 1.06E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3742s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2910s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0832s for    90112 events => throughput is 1.08E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7367s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6615s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0752s for    81920 events => throughput is 1.09E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673940164823388E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1742s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1669s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0071s for     8192 events => throughput is 1.16E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432777382586498E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2351s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2281s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0068s for     8192 events => throughput is 1.21E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673940164823388E-002) differ by less than 4E-4 (1.3053362912796018e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432777382586498E-002) differ by less than 4E-4 (1.305336294610271e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552600830551153E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3620s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2884s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0734s for    90112 events => throughput is 1.23E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711091925143637E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7204s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6521s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0681s for    81920 events => throughput is 1.20E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552600830551153E-002) differ by less than 4E-4 (1.3072067583941305e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711091925143637E-002) differ by less than 4E-4 (1.3067530257870885e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.224592e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.224241e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.232559e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.240015e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673937587540376E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1700s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1671s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.06E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432774839452045E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2276s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2246s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.91E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937587540376E-002) differ by less than 4E-4 (1.5804696607002455e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774839452045E-002) differ by less than 4E-4 (1.5804696607002455e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552598352995826E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3198s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2901s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0295s for    90112 events => throughput is 3.06E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711089416628339E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6836s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6563s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0271s for    81920 events => throughput is 3.02E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598352995826E-002) differ by less than 4E-4 (1.5778222273166165e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089416628339E-002) differ by less than 4E-4 (1.5802766439865223e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.112816e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.177757e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.235322e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.232799e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1723s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1695s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0025s for     8192 events => throughput is 3.25E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2259s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2232s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.16E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937665039383E-002) differ by less than 4E-4 (1.5721963886328183e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3258s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2983s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0272s for    90112 events => throughput is 3.31E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6769s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6517s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0251s for    81920 events => throughput is 3.27E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598384447972E-002) differ by less than 4E-4 (1.5743868098105906e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.502247e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.494973e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.548669e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.587728e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673937665039383E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1707s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1680s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0024s for     8192 events => throughput is 3.36E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432774915924193E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2346s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2319s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.20E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673937665039383E-002) differ by less than 4E-4 (1.5721963886328183e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432774915924193E-002) differ by less than 4E-4 (1.5721963908532643e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552598384447972E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3196s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2931s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0262s for    90112 events => throughput is 3.44E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09171 [9.1711089453554426E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6790s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6545s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0243s for    81920 events => throughput is 3.37E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552598384447972E-002) differ by less than 4E-4 (1.5743868098105906e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711089453554426E-002) differ by less than 4E-4 (1.5762502958427405e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.450165e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.557503e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.604912e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.521678e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673941354609866E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1726s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1695s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 2.98E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432778556608516E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2290s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2260s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.95E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673941354609866E-002) differ by less than 4E-4 (1.1783227038542066e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432778556608516E-002) differ by less than 4E-4 (1.1783227071848756e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552602074172512E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3224s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2944s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0276s for    90112 events => throughput is 3.26E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09171 [9.1711093118690828E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6805s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6543s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0261s for    81920 events => throughput is 3.14E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552602074172512E-002) differ by less than 4E-4 (1.1713699621385132e-07)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711093118690828E-002) differ by less than 4E-4 (1.1766109664357316e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.396726e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.370852e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.725203e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.638790e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673942834136617E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6072s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6038s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.92E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.09243 [9.2432780016531851E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6536s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6499s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.56E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673942834136617E-002) differ by less than 4E-4 (1.0203783962214885e-07)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432780016531851E-002) differ by less than 4E-4 (1.0203783951112655e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552603722717646E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7330s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7250s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0075s for    90112 events => throughput is 1.20E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.09171 [9.1711094767039689E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.1007s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0925s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0077s for    81920 events => throughput is 1.07E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552603722717646E-002) differ by less than 4E-4 (9.913046328247077e-08)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711094767039689E-002) differ by less than 4E-4 (9.968782199720749e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.411011e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.990359e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.706794e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.155049e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.799155e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.169145e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.248727e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.016803e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.819637e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.172418e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.234067e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.059728e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.364267e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.911760e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.864212e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.715158e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index 84dcc3d3fe..078c01c46b 100644
--- a/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
-
-make USEBUILDDIR=1 BACKEND=cuda
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_06:59:21
+DATE: 2024-09-15_12:25:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 3837 events (found 8192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6937s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6858s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0079s for     8192 events => throughput is 1.04E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7519s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7443s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0076s for     8192 events => throughput is 1.08E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952392424348E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1770s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1689s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0081s for     8192 events => throughput is 1.01E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789448173985E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2218s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2141s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0077s for     8192 events => throughput is 1.07E+06 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_eemumu_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612798370570E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3754s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2919s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0835s for    90112 events => throughput is 1.08E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103909519892E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7304s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6547s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0757s for    81920 events => throughput is 1.08E+06 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1788s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1711s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0073s for     8192 events => throughput is 1.12E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2209s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2132s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0075s for     8192 events => throughput is 1.10E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952389194196E-002) differ by less than 2E-4 (3.448297203334505e-11)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3745s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2946s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0795s for    90112 events => throughput is 1.13E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7244s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6512s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0730s for    81920 events => throughput is 1.12E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612788773982E-002) differ by less than 2E-4 (1.0482048562465707e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.133071e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.127620e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.138322e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.149889e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952389194196E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1750s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0044s for     8192 events => throughput is 1.84E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789444986618E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2170s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2124s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0043s for     8192 events => throughput is 1.91E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952389194196E-002) differ by less than 2E-4 (3.448297203334505e-11)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444986618E-002) differ by less than 2E-4 (3.448308305564751e-11)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612788773982E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3355s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2900s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0452s for    90112 events => throughput is 1.99E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103904317928E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7028s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6603s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0422s for    81920 events => throughput is 1.94E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612788773982E-002) differ by less than 2E-4 (1.0482048562465707e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103904317928E-002) differ by less than 2E-4 (5.6721183305796785e-11)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.003701e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.010964e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.055731e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.042848e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1752s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1715s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.42E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2175s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2138s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0035s for     8192 events => throughput is 2.37E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3299s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2945s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0350s for    90112 events => throughput is 2.57E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6838s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6506s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0330s for    81920 events => throughput is 2.49E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.523311e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.582216e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.683822e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.715007e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1728s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1694s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2166s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2132s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.62E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3292s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2947s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0342s for    90112 events => throughput is 2.63E+06 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6865s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6535s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0327s for    81920 events => throughput is 2.51E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.635646e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.595546e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.742968e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.748668e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952388695372E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.1748s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.1708s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0036s for     8192 events => throughput is 2.27E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09243 [9.2432789444494415E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.2172s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.2131s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0039s for     8192 events => throughput is 2.12E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cpp (9.3673952388695372E-002) differ by less than 2E-4 (3.980804574865715e-11)
+OK! xsec from fortran (9.2432789448173985E-002) and cpp (9.2432789444494415E-002) differ by less than 2E-4 (3.980804574865715e-11)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612782299689E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3470s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3038s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0428s for    90112 events => throughput is 2.11E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.09171 [9.1711103899063451E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6870s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6485s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0382s for    81920 events => throughput is 2.14E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cpp (9.1552612782299689E-002) differ by less than 2E-4 (1.7553714037887858e-10)
+OK! xsec from fortran (9.1711103909519892E-002) and cpp (9.1711103899063451E-002) differ by less than 2E-4 (1.1401501964769523e-10)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.151910e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.209077e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.357675e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.339473e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 2 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09367 [9.3673952381938416E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1566 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6042s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6007s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.89E+06 events/s
+ [XSECTION] Cross section = 0.09243 [9.2432789437826970E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1589 events (found 1593 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.6512s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6475s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.3673952392424348E-002) and cuda (9.3673952381938416E-002) differ by less than 2E-4 (1.1194078997078805e-10)
+OK! xsec from fortran (9.2432789448173985E-002) and cuda (9.2432789437826970E-002) differ by less than 2E-4 (1.1194101201539297e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_eemumu_x10_cudacpp > /tmp/avalassi/output_eemumu_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 2 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 2 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.09155 [9.1552612789338281E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1803 events (found 1808 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7482s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7397s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    90112 events => throughput is 1.15E+07 events/s
+ [XSECTION] Cross section = 0.09171 [9.1711103901050417E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 1655 events (found 1660 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.0962s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0880s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0077s for    81920 events => throughput is 1.06E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (9.1552612798370570E-002) and cuda (9.1552612789338281E-002) differ by less than 2E-4 (9.865686045884559e-11)
+OK! xsec from fortran (9.1711103909519892E-002) and cuda (9.1711103901050417E-002) differ by less than 2E-4 (9.234946141134515e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.329474e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.884543e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.842901e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.177546e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.560810e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.231479e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.923949e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.854991e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.533688e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.235273e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.908979e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.864709e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.568820e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.202609e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.185626e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.123678e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index e5be51b21f..96c9821e5a 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppnone
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-03_17:32:05
+DATE: 2024-09-15_12:25:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
  [UNWEIGHT] Wrote 2613 events (found 5374 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7988s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7566s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0422s for     8192 events => throughput is 1.94E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8743s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8316s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0427s for     8192 events => throughput is 1.92E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
  [UNWEIGHT] Wrote 1618 events (found 1623 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4095s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3674s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4675s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4254s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -108,9 +108,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
  [UNWEIGHT] Wrote 1613 events (found 1618 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5675s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.1480s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4195s for    81920 events => throughput is 1.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.0472s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6157s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4316s for    81920 events => throughput is 1.90E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,13 +132,496 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=1
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
  [UNWEIGHT] Wrote 1618 events (found 1623 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4162s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3699s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0458s for     8192 events => throughput is 1.79E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4948s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4483s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0461s for     8192 events => throughput is 1.78E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-ERROR! xsec from fortran (47.138611968034162) and cpp (47.190399993877946) differ by more than 3E-14 (0.0010986328125)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
+
+*** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-none) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0844s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6223s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4617s for    81920 events => throughput is 1.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+
+*** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 1.819761e+05                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 1.831048e+05                 )  sec^-1
+
+*** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4726s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4464s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0258s for     8192 events => throughput is 3.18E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+
+*** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
+
+*** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-sse4) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8861s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6258s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2599s for    81920 events => throughput is 3.15E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+
+*** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.274415e+05                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.309936e+05                 )  sec^-1
+
+*** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4481s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4320s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0158s for     8192 events => throughput is 5.17E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+
+*** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
+
+*** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-avx2) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7336s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5761s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1571s for    81920 events => throughput is 5.21E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+
+*** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 5.284458e+05                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 5.379827e+05                 )  sec^-1
+
+*** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4531s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4383s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0144s for     8192 events => throughput is 5.67E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+
+*** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034162) differ by less than 3E-14 (0.0)
+
+*** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-512y) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7254s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5812s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1439s for    81920 events => throughput is 5.69E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+
+*** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 5.785167e+05                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 5.877514e+05                 )  sec^-1
+
+*** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.138611968034169] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4571s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4340s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0228s for     8192 events => throughput is 3.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+
+*** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.138611968034162) and cpp (47.138611968034169) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.1 and events.lhe.ref.1 are identical
+
+*** (2-512z) EXECUTE MADEVENT_CPP x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268192] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8048s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5784s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2261s for    81920 events => throughput is 3.62E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+
+*** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cpp (47.144596232268192) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.669033e+05                 )  sec^-1
+
+*** EXECUTE CHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.709076e+05                 )  sec^-1
+
+*** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+8192 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x1_cudacpp > /tmp/avalassi/output_ggtt_x1_cudacpp'
+DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.138611968034176] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8678s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8638s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.58E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+
+*** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.138611968034162) and cuda (47.138611968034176) differ by less than 3E-14 (2.220446049250313e-16)
+
+*** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cuda.1 and events.lhe.ref.1 are identical
+
+*** (3-cuda) EXECUTE MADEVENT_CUDA x10 (create events.lhe) ***
+--------------------
+CUDACPP_RUNTIME_FBRIDGEMODE = (not set)
+CUDACPP_RUNTIME_VECSIZEUSED = 8192
+--------------------
+81920 1 1 ! Number of events and max and min iterations
+0.000001 ! Accuracy (ignored because max iterations = min iterations)
+0 ! Grid Adjustment 0=none, 2=adjust (NB if = 0, ftn26 will still be used if present)
+1 ! Suppress Amplitude 1=yes (i.e. use MadEvent single-diagram enhancement)
+0 ! Helicity Sum/event 0=exact
+1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
+--------------------
+Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
+ [OPENMPTH] omp_get_max_threads/nproc = 1/4
+ [NGOODHEL] ngoodhel/ncomb = 16/16
+ [XSECTION] VECSIZE_USED = 8192
+ [XSECTION] MultiChannel = TRUE
+ [XSECTION] Configuration = 1
+ [XSECTION] ChannelId = 1
+ [XSECTION] Cross section = 47.14 [47.144596232268178] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0186s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0088s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for    81920 events => throughput is 9.08E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+
+*** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
+
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596232268178) differ by less than 3E-14 (1.1102230246251565e-16)
+
+*** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
+
+OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
+
+*** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 2.691011e+06                 )  sec^-1
+
+*** EXECUTE GCHECK(8192) -p 256 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 3.112364e+06                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 2.690097e+07                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 7.580203e+07                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 2.646974e+07                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 7.911031e+07                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 2.660586e+07                 )  sec^-1
+
+*** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
+Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
+Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
+EvtsPerSec[MECalcOnly] (3a) = ( 2.671503e+07                 )  sec^-1
+
+*** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
+
+TEST COMPLETED
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index a10a26ba3e..332313b063 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
-
-
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:00:03
+DATE: 2024-09-15_12:25:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
  [UNWEIGHT] Wrote 2613 events (found 5374 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7848s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7437s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0411s for     8192 events => throughput is 1.99E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8572s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8148s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0424s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4070s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3656s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0413s for     8192 events => throughput is 1.98E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4702s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4279s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.94E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6931s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2377s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4554s for    90112 events => throughput is 1.98E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9993s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5780s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4213s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190394119386738] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4079s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3662s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0413s for     8192 events => throughput is 1.98E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138606099989779] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4661s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4236s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0422s for     8192 events => throughput is 1.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190394119386738) differ by less than 4E-4 (1.2448487851646206e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138606099989779) differ by less than 4E-4 (1.2448487851646206e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156028163566589] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7285s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2748s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4533s for    90112 events => throughput is 1.99E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144592707001024] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0011s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5800s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4208s for    81920 events => throughput is 1.95E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156028163566589) differ by less than 4E-4 (7.682836755673605e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144592707001024) differ by less than 4E-4 (7.477563590541081e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.999199e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.965272e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.978384e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.960936e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190390126085290] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3790s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3617s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0171s for     8192 events => throughput is 4.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.138602111070696] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4391s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4215s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0173s for     8192 events => throughput is 4.73E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190390126085290) differ by less than 4E-4 (2.091059337905321e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602111070696) differ by less than 4E-4 (2.091059336795098e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156024287692041] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4751s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2892s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1856s for    90112 events => throughput is 4.86E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.144588828412729] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7527s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5796s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1729s for    81920 events => throughput is 4.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156024287692041) differ by less than 4E-4 (1.5902091854425038e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144588828412729) differ by less than 4E-4 (1.570456860111591e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.722011e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.736938e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.787563e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.756905e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3761s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3668s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for     8192 events => throughput is 9.11E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4356s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4246s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0108s for     8192 events => throughput is 7.59E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190390514620915) differ by less than 4E-4 (2.008725722424387e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3789s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2802s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0984s for    90112 events => throughput is 9.16E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6648s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5733s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0913s for    81920 events => throughput is 8.98E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156022338885265) differ by less than 4E-4 (2.0034769487864423e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.087831e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.164017e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.255190e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.232442e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190390514620915] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3712s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3621s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0088s for     8192 events => throughput is 9.32E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.138602499179925] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4302s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4213s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0087s for     8192 events => throughput is 9.47E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190390514620915) differ by less than 4E-4 (2.008725722424387e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138602499179925) differ by less than 4E-4 (2.008725722424387e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156022338885265] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3663s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2738s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0922s for    90112 events => throughput is 9.77E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 47.14 [47.144586996341530] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6635s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5765s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0868s for    81920 events => throughput is 9.44E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156022338885265) differ by less than 4E-4 (2.0034769487864423e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144586996341530) differ by less than 4E-4 (1.9590636879396328e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.782240e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.838171e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.861091e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.798371e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190394861161103] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3741s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3621s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0116s for     8192 events => throughput is 7.08E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138606840950104] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4367s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4239s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0125s for     8192 events => throughput is 6.54E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190394861161103) differ by less than 4E-4 (1.0876612288601706e-07)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138606840950104) differ by less than 4E-4 (1.0876612277499476e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156026777408194] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4135s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2849s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1282s for    90112 events => throughput is 7.03E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144591429357156] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7012s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5808s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1202s for    81920 events => throughput is 6.82E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156026777408194) differ by less than 4E-4 (1.0622351265254792e-07)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144591429357156) differ by less than 4E-4 (1.0187617272006122e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.828517e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.784830e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.977153e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.151143e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190400428492907] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8297s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8261s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.75E+06 events/s
+ [XSECTION] Cross section = 47.14 [47.138612402172164] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8665s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8626s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.54E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cuda (47.190400428492907) differ by less than 4E-4 (9.209817353195149e-09)
+OK! xsec from fortran (47.138611968034162) and cuda (47.138612402172164) differ by less than 4E-4 (9.209817353195149e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156031802494475] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7211s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7122s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for    90112 events => throughput is 1.10E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 47.14 [47.144596666727985] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0261s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0173s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for    81920 events => throughput is 9.98E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cuda (47.156031802494475) differ by less than 4E-4 (3.394462488870431e-10)
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596666727985) differ by less than 4E-4 (9.215473939505614e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.368064e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.885710e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.622906e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.367920e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.123333e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.566831e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.384526e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.323685e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.086847e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.551074e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.396981e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.318066e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.764264e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.342254e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.103142e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.899112e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index 7510df5e12..690f140a41 100644
--- a/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
 
 make USEBUILDDIR=1 BACKEND=cuda
-
 make USEBUILDDIR=1 BACKEND=cppnone
 
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:00:28
+DATE: 2024-09-15_12:26:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
  [UNWEIGHT] Wrote 2613 events (found 5374 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7873s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7456s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0417s for     8192 events => throughput is 1.96E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8603s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8180s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x1_fortran > /tmp/aval
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399993877946] fbridge_mode=0
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4126s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3694s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0431s for     8192 events => throughput is 1.90E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.138611968034162] fbridge_mode=0
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4773s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4333s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0440s for     8192 events => throughput is 1.86E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggtt_x10_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156031786487532] fbridge_mode=0
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7506s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2796s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4710s for    90112 events => throughput is 1.91E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144596232268185] fbridge_mode=0
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9957s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5742s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4215s for    81920 events => throughput is 1.94E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190401334262738] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4088s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3623s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0460s for     8192 events => throughput is 1.78E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138613306947967] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4699s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4242s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0453s for     8192 events => throughput is 1.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190401334262738) differ by less than 2E-4 (2.8403760010675683e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947967) differ by less than 2E-4 (2.8403759566586473e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7707s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2834s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4869s for    90112 events => throughput is 1.85E+05 events/s
+ [XSECTION] Cross section = 47.14 [47.144597573367548] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0415s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5863s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4548s for    81920 events => throughput is 1.80E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156033127698386) differ by less than 2E-4 (2.844197877216459e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367548) differ by less than 2E-4 (2.8446512922997158e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.857679e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.842164e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.861040e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.851851e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190401334262724] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3905s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3659s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0242s for     8192 events => throughput is 3.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138613306947953] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4503s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4251s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0249s for     8192 events => throughput is 3.30E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190401334262724) differ by less than 2E-4 (2.8403759566586473e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613306947953) differ by less than 2E-4 (2.8403759344541868e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156033127698386] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5519s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2846s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2669s for    90112 events => throughput is 3.38E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144597573367555] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8185s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5698s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2483s for    81920 events => throughput is 3.30E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156033127698386) differ by less than 2E-4 (2.844197877216459e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597573367555) differ by less than 2E-4 (2.8446512922997158e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.370119e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.330961e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.431477e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.355752e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3801s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3646s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0151s for     8192 events => throughput is 5.43E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4396s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4241s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0152s for     8192 events => throughput is 5.39E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4492s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2811s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1676s for    90112 events => throughput is 5.38E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7262s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5697s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1562s for    81920 events => throughput is 5.24E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.368052e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.311307e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.349021e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.353756e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3795s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3651s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0140s for     8192 events => throughput is 5.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4386s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4240s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0143s for     8192 events => throughput is 5.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4304s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2782s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1517s for    90112 events => throughput is 5.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7199s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5781s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1415s for    81920 events => throughput is 5.79E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.945633e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.855269e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.030396e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.000340e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190401377780539] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3879s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3664s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0211s for     8192 events => throughput is 3.89E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 47.14 [47.138613350418019] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4473s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4249s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0220s for     8192 events => throughput is 3.72E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cpp (47.190401377780539) differ by less than 2E-4 (2.932593479165746e-08)
+OK! xsec from fortran (47.138611968034162) and cpp (47.138613350418019) differ by less than 2E-4 (2.932593479165746e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156033163835552] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5162s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2791s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2367s for    90112 events => throughput is 3.81E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 47.14 [47.144597608209963] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8042s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5822s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2217s for    81920 events => throughput is 3.69E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cpp (47.156033163835552) differ by less than 2E-4 (2.9208310436956708e-08)
+OK! xsec from fortran (47.144596232268185) and cpp (47.144597608209963) differ by less than 2E-4 (2.9185567074208052e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.720031e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.705483e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.841945e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.752040e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.19 [47.190399989386655] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8041s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8003s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0029s for     8192 events => throughput is 2.78E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 47.14 [47.138611963547788] fbridge_mode=1
+ [UNWEIGHT] Wrote 1618 events (found 1623 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8690s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8650s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.54E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.190399993877946) and cuda (47.190399989386655) differ by less than 2E-4 (9.517386878599154e-11)
+OK! xsec from fortran (47.138611968034162) and cuda (47.138611963547788) differ by less than 2E-4 (9.517409083059647e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggtt_x10_cudacpp > /tmp/avalassi/output_ggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 47.16 [47.156031785698381] fbridge_mode=1
- [UNWEIGHT] Wrote 1766 events (found 1771 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7240s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7140s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0092s for    90112 events => throughput is 9.81E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 47.14 [47.144596232269095] fbridge_mode=1
+ [UNWEIGHT] Wrote 1613 events (found 1618 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0284s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0185s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 8.96E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (47.156031786487532) and cuda (47.156031785698381) differ by less than 2E-4 (1.6734835739384835e-11)
+OK! xsec from fortran (47.144596232268185) and cuda (47.144596232269095) differ by less than 2E-4 (1.9317880628477724e-14)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.108490e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.739810e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.608485e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.024140e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.892798e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.672565e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.676934e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.446976e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.929262e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.658819e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.040199e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.897824e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.901697e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.677542e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.716849e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.722978e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index ab28660dbe..b1e24854d0 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:00:54
+DATE: 2024-09-15_12:26:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 387 events (found 1591 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6949s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3669s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3280s for     8192 events => throughput is 2.50E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7657s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4327s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3330s for     8192 events => throughput is 2.46E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6474s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3210s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3264s for     8192 events => throughput is 2.51E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7371s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4051s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3320s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1361s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5293s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.6069s for    90112 events => throughput is 2.50E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.2429s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9222s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3207s for    81920 events => throughput is 2.47E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6604s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3209s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3383s for     8192 events => throughput is 2.42E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7536s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4055s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3470s for     8192 events => throughput is 2.36E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395722) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.2721s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7347s for    90112 events => throughput is 2.41E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.3883s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9232s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.4640s for    81920 events => throughput is 2.36E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.488662e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.429969e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.503105e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.453551e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395720] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5003s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3222s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1773s for     8192 events => throughput is 4.62E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5849s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4022s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1821s for     8192 events => throughput is 4.50E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395720) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748567E-002) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    3.5120s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5415s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9697s for    90112 events => throughput is 4.57E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279650E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.7388s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9277s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.8104s for    81920 events => throughput is 4.52E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279650E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.692831e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.641238e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.747820e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.663272e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4121s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3245s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0869s for     8192 events => throughput is 9.42E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4942s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4031s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0906s for     8192 events => throughput is 9.05E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395723) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5004s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5305s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9692s for    90112 events => throughput is 9.30E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8146s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9105s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9035s for    81920 events => throughput is 9.07E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.824726e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.167207e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.100533e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.243572e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395723] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4010s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3220s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0784s for     8192 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748595E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4829s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4018s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0806s for     8192 events => throughput is 1.02E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395723) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748595E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4306s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5505s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8795s for    90112 events => throughput is 1.02E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7304s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9177s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8121s for    81920 events => throughput is 1.01E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.072680e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.039882e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.090132e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.052704e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395724] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4311s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3203s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1100s for     8192 events => throughput is 7.45E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748581E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5198s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4042s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1150s for     8192 events => throughput is 7.12E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806079395724) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471485809748581E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7386s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5313s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2066s for    90112 events => throughput is 7.47E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279622E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.0660s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9169s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1486s for    81920 events => throughput is 7.13E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946447903217255E-002) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971656827279622E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.473786e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.941251e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.507446e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.958022e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7683s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7560s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0092s for     8192 events => throughput is 8.86E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748553E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8509s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8384s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0095s for     8192 events => throughput is 8.59E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cuda (0.10562806079395722) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485809748553E-002) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217269E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0054s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9750s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0272s for    90112 events => throughput is 3.31E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279636E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3784s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3496s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0258s for    81920 events => throughput is 3.18E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946447903217269E-002) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656827279636E-002) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.151435e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.100979e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.583032e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.403743e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.502762e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.183615e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.161500e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.166273e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.495653e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.191972e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.178328e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.170848e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.500095e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.176434e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.644291e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.659019e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index 61406c8f7b..985d934955 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppnone
+make USEBUILDDIR=1 BACKEND=cppsse4
 
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:01:36
+DATE: 2024-09-15_12:27:32
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 387 events (found 1591 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6982s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3263s for     8192 events => throughput is 2.51E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7648s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4325s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3323s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6424s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3173s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3251s for     8192 events => throughput is 2.52E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7338s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4033s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3306s for     8192 events => throughput is 2.48E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.0954s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5080s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.5873s for    90112 events => throughput is 2.51E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.2246s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9116s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3130s for    81920 events => throughput is 2.47E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562804416188390] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6496s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3219s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3267s for     8192 events => throughput is 2.51E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 0.07847 [7.8471473453718410E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7384s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4045s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3330s for     8192 events => throughput is 2.46E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562804416188390) differ by less than 4E-4 (1.57458853178305e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471473453718410E-002) differ by less than 4E-4 (1.574588530672827e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946434556369253E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1623s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5506s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.6107s for    90112 events => throughput is 2.50E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 0.07997 [7.9971643267110940E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.2604s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9235s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.3360s for    81920 events => throughput is 2.46E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946434556369253E-002) differ by less than 4E-4 (1.6694735482936096e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971643267110940E-002) differ by less than 4E-4 (1.69562182517069e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.574892e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.530952e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.576052e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.541703e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562802510294199] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4218s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3212s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1001s for     8192 events => throughput is 8.19E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07847 [7.8471459294758378E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5085s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4061s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1020s for     8192 events => throughput is 8.03E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802510294199) differ by less than 4E-4 (3.37893311330717e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459294758378E-002) differ by less than 4E-4 (3.37893311330717e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946421150520988E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6339s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5347s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0986s for    90112 events => throughput is 8.20E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.07997 [7.9971629726281482E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9524s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9325s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0195s for    81920 events => throughput is 8.04E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946421150520988E-002) differ by less than 4E-4 (3.3463270687761337e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629726281482E-002) differ by less than 4E-4 (3.38882539141494e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.359747e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.224767e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.308905e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.232784e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3690s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3224s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0461s for     8192 events => throughput is 1.78E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4560s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4085s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0472s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802567355024) differ by less than 4E-4 (3.324912595248364e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0358s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5340s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5014s for    90112 events => throughput is 1.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.4007s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9344s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4660s for    81920 events => throughput is 1.76E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946420380865552E-002) differ by less than 4E-4 (3.4425984429820033e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.815131e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.788351e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.806385e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.804243e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562802567355024] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3708s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3279s
+ [XSECTION] Cross section = 0.07847 [7.8471459718665412E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4502s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4074s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0424s for     8192 events => throughput is 1.93E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562802567355024) differ by less than 4E-4 (3.324912595248364e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471459718665412E-002) differ by less than 4E-4 (3.324912595248364e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946420380865552E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9961s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4595s for    90112 events => throughput is 1.96E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.07997 [7.9971629259822388E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3548s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9286s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4259s for    81920 events => throughput is 1.92E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946420380865552E-002) differ by less than 4E-4 (3.4425984429820033e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971629259822388E-002) differ by less than 4E-4 (3.447153443802975e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.027880e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.953582e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.996384e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.958777e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562804211436801] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3782s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3228s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0548s for     8192 events => throughput is 1.49E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07847 [7.8471471932611128E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4128s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0553s for     8192 events => throughput is 1.48E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562804211436801) differ by less than 4E-4 (1.768430573090285e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471471932611128E-002) differ by less than 4E-4 (1.768430569759616e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946430964077192E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.1472s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5441s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6025s for    90112 events => throughput is 1.50E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07997 [7.9971639934306102E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.4956s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9320s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5632s for    81920 events => throughput is 1.45E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946430964077192E-002) differ by less than 4E-4 (2.1188108423331187e-07)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971639934306102E-002) differ by less than 4E-4 (2.1123700788550082e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.510181e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.487964e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.524564e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.471980e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562804625987131] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7628s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7581s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0034s for     8192 events => throughput is 2.39E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07847 [7.8471475012321185E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8512s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8465s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0036s for     8192 events => throughput is 2.26E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cuda (0.10562804625987131) differ by less than 4E-4 (1.375968260441951e-07)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471475012321185E-002) differ by less than 4E-4 (1.375968260441951e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946440545672862E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9791s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9646s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0133s for    90112 events => throughput is 6.79E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07997 [7.9971648932322295E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3665s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3527s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0128s for    81920 events => throughput is 6.42E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946440545672862E-002) differ by less than 4E-4 (9.203091055898227e-08)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971648932322295E-002) differ by less than 4E-4 (9.872194262072753e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.796780e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.643032e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.966060e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.828890e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.318273e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.215885e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.228359e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.209299e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.340547e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.230792e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.291829e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.279914e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.222050e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.122919e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.261962e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.244670e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index d4a16dbe06..f09398c494 100644
--- a/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
+make USEBUILDDIR=1 BACKEND=cpp512y
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:02:13
+DATE: 2024-09-15_12:28:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
  [UNWEIGHT] Wrote 387 events (found 1591 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6895s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3623s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3271s for     8192 events => throughput is 2.50E+04 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7588s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4272s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3316s for     8192 events => throughput is 2.47E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806079395722] fbridge_mode=0
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6436s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3209s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.3227s for     8192 events => throughput is 2.54E+04 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485809748567E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7376s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4040s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.3336s for     8192 events => throughput is 2.46E+04 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttg_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447903217255E-002] fbridge_mode=0
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1083s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5165s
- [COUNTERS] Fortran MEs      ( 1 ) :    3.5918s for    90112 events => throughput is 2.51E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971656827279608E-002] fbridge_mode=0
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.2380s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9161s
+ [COUNTERS] Fortran MEs      ( 1 ) :    3.3219s for    81920 events => throughput is 2.47E+04 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806184450918] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6665s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3231s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3421s for     8192 events => throughput is 2.39E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.07847 [7.8471486590207584E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7582s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4046s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3525s for     8192 events => throughput is 2.32E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806184450918) differ by less than 2E-4 (9.945765988561561e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486590207584E-002) differ by less than 2E-4 (9.945765766516956e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946448664873659E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    5.3338s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5463s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.7862s for    90112 events => throughput is 2.38E+04 events/s
+ [XSECTION] Cross section = 0.07997 [7.9971657589635384E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.4849s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9415s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.5423s for    81920 events => throughput is 2.31E+04 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448664873659E-002) differ by less than 2E-4 (9.527082456273206e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589635384E-002) differ by less than 2E-4 (9.532824529756567e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.459745e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.417569e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.468576e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.399198e+04                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806177750511] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5005s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3230s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1768s for     8192 events => throughput is 4.63E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471486540430027E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5837s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4040s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1790s for     8192 events => throughput is 4.58E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806177750511) differ by less than 2E-4 (9.311426296676473e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486540430027E-002) differ by less than 2E-4 (9.311426296676473e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946448673477319E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    3.5369s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5635s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.9727s for    90112 events => throughput is 4.57E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971657589963913E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.7197s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9173s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.8018s for    81920 events => throughput is 4.55E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448673477319E-002) differ by less than 2E-4 (9.634700370853011e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657589963913E-002) differ by less than 2E-4 (9.536932576992285e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.701428e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.658424e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.794307e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.679651e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4128s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3237s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0884s for     8192 events => throughput is 9.27E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4953s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4035s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0912s for     8192 events => throughput is 8.98E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806158303427) differ by less than 2E-4 (7.470335461334798e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4996s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5337s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9653s for    90112 events => throughput is 9.34E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.8060s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9080s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8973s for    81920 events => throughput is 9.13E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448515023347E-002) differ by less than 2E-4 (7.652698919002887e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.510598e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.369001e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.411016e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.292033e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806158303427] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3985s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3199s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0780s for     8192 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.07847 [7.8471486395956899E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4845s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4048s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0792s for     8192 events => throughput is 1.03E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806158303427) differ by less than 2E-4 (7.470335461334798e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486395956899E-002) differ by less than 2E-4 (7.470335683379403e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946448515023347E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3996s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5420s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8570s for    90112 events => throughput is 1.05E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.07997 [7.9971657432811344E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7172s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9239s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7928s for    81920 events => throughput is 1.03E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448515023347E-002) differ by less than 2E-4 (7.652698919002887e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657432811344E-002) differ by less than 2E-4 (7.571829385710771e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.081438e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.064357e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.093693e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.063711e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806177389659] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4322s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3195s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1119s for     8192 events => throughput is 7.32E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07847 [7.8471486537749241E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5230s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4057s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1167s for     8192 events => throughput is 7.02E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cpp (0.10562806177389659) differ by less than 2E-4 (9.277263846030337e-09)
+OK! xsec from fortran (7.8471485809748567E-002) and cpp (7.8471486537749241E-002) differ by less than 2E-4 (9.277263846030337e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946448645092413E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7734s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5330s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2396s for    90112 events => throughput is 7.27E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.07997 [7.9971657565670345E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    3.1033s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9248s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1778s for    81920 events => throughput is 6.96E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cpp (7.9946448645092413E-002) differ by less than 2E-4 (9.279651269622491e-09)
+OK! xsec from fortran (7.9971656827279608E-002) and cpp (7.9971657565670345E-002) differ by less than 2E-4 (9.233155351395794e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.281646e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.025965e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.341397e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.997215e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 16 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.1056 [0.10562806076929508] fbridge_mode=1
- [UNWEIGHT] Wrote 391 events (found 1147 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7648s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7525s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for     8192 events => throughput is 8.85E+05 events/s
+ [XSECTION] Cross section = 0.07847 [7.8471485791426987E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 376 events (found 1358 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8520s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8395s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0096s for     8192 events => throughput is 8.58E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.10562806079395722) and cuda (0.10562806076929508) differ by less than 2E-4 (2.334809012793926e-10)
+OK! xsec from fortran (7.8471485809748567E-002) and cuda (7.8471485791426987E-002) differ by less than 2E-4 (2.334807902570901e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttg_x10_cudacpp > /tmp/avalassi/output_ggttg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 16 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 16 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 32/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.07995 [7.9946447910357057E-002] fbridge_mode=1
- [UNWEIGHT] Wrote 1853 events (found 1858 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9880s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9577s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0273s for    90112 events => throughput is 3.30E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0031s
+ [XSECTION] Cross section = 0.07997 [7.9971656830583548E-002] fbridge_mode=1
+ [UNWEIGHT] Wrote 2267 events (found 2272 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3869s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3578s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0261s for    81920 events => throughput is 3.14E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.9946447903217255E-002) and cuda (7.9946447910357057E-002) differ by less than 2E-4 (8.930722827926729e-11)
+OK! xsec from fortran (7.9971656827279608E-002) and cuda (7.9971656830583548E-002) differ by less than 2E-4 (4.131384123695625e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.146368e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.075089e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.559076e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.284373e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.447733e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.113955e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.157373e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.151597e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.443954e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.103058e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.168889e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.161281e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.468540e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.097615e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.627191e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.632131e+06                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index e1c25239e7..aa79e74132 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:02:55
+DATE: 2024-09-15_12:28:56
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
  [UNWEIGHT] Wrote 7 events (found 223 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4653s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2554s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2100s for     8192 events => throughput is 1.95E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.6228s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3217s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3011s for     8192 events => throughput is 1.90E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4569s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2471s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2098s for     8192 events => throughput is 1.95E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6030s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3119s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2911s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   48.3066s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8115s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.4950s for    90112 events => throughput is 1.94E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.1984s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1366s
+ [COUNTERS] Fortran MEs      ( 1 ) :   43.0618s for    81920 events => throughput is 1.90E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514666] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6394s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2529s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3779s for     8192 events => throughput is 1.87E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0087s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.7927s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3128s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4709s for     8192 events => throughput is 1.83E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0091s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514666) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   50.0665s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8392s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   48.2181s for    90112 events => throughput is 1.87E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0092s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   46.9448s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1256s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   44.8103s for    81920 events => throughput is 1.83E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0089s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.929856e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.891751e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.936527e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.888267e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5862s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2518s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3295s for     8192 events => throughput is 3.52E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6844s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3118s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3678s for     8192 events => throughput is 3.46E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514688) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   27.5773s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8641s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7084s for    90112 events => throughput is 3.51E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248325] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.9231s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1299s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   23.7883s for    81920 events => throughput is 3.44E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607687) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248325) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.637176e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.557169e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.613295e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.534554e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2704s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2536s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0143s for     8192 events => throughput is 8.08E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.3455s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3111s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0319s for     8192 events => throughput is 7.94E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   13.1377s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8650s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.2701s for    90112 events => throughput is 8.00E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0026s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   12.5238s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1221s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.3993s for    81920 events => throughput is 7.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.206026e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.957412e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.331692e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.987255e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1622s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2533s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9066s for     8192 events => throughput is 9.04E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0023s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.2470s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3177s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9271s for     8192 events => throughput is 8.84E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   11.7181s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8398s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.8761s for    90112 events => throughput is 9.12E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   11.3142s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1103s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    9.2017s for    81920 events => throughput is 8.90E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0021s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.233002e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.281561e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.450469e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.268453e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514699] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3833s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2521s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1284s for     8192 events => throughput is 7.26E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.4928s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3122s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1778s for     8192 events => throughput is 6.96E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063416981514699) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786561240197) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607679] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   14.3179s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8300s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.4849s for    90112 events => throughput is 7.22E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0030s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248320] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   13.9855s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1285s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.8543s for    81920 events => throughput is 6.91E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487411607679) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930257969248320) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.196841e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.050033e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.350514e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.018224e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514688] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7641s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6905s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0387s for     8192 events => throughput is 2.12E+05 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240192] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8261s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7522s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0390s for     8192 events => throughput is 2.10E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0349s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cuda (0.78063416981514688) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786561240192) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607690] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6595s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2552s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3695s for    90112 events => throughput is 2.44E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0347s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248336] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9166s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5498s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3320s for    81920 events => throughput is 2.47E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cuda (0.19154487411607690) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930257969248336) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.140664e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.134189e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.354973e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.335722e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.118962e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.124137e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.168678e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.168234e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.128635e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.119614e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.173072e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.173416e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.126252e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.119318e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.428223e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.423891e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index a94ddeb453..4deca2cf02 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:06:53
+DATE: 2024-09-15_12:32:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
  [UNWEIGHT] Wrote 7 events (found 223 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4597s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2531s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2066s for     8192 events => throughput is 1.95E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.6182s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3167s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3014s for     8192 events => throughput is 1.90E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4703s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2495s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2209s for     8192 events => throughput is 1.94E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6122s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3142s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.2980s for     8192 events => throughput is 1.91E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   48.2714s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8151s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.4563s for    90112 events => throughput is 1.94E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.2847s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1346s
+ [COUNTERS] Fortran MEs      ( 1 ) :   43.1501s for    81920 events => throughput is 1.90E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063782001975612] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.5088s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2538s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.2465s for     8192 events => throughput is 1.93E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0085s
+ [XSECTION] Cross section = 0.3314 [0.33144941544531159] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6745s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3147s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.3511s for     8192 events => throughput is 1.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0087s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063782001975612) differ by less than 4E-4 (4.67594777497915e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144941544531159) differ by less than 4E-4 (4.675947774535061e-06)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -161,23 +161,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154547896253576] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   48.6783s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8455s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   46.8243s for    90112 events => throughput is 1.92E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0084s
+ [XSECTION] Cross section = 0.2093 [0.20930329135137288] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.6898s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1650s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   43.5163s for    81920 events => throughput is 1.88E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0086s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154547896253576) differ by less than 4E-4 (3.15772719927665e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930329135137288) differ by less than 4E-4 (3.400143900211816e-06)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -186,12 +186,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.984164e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.944338e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.972429e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.941508e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -213,16 +213,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063772189507497] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4262s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2504s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1731s for     8192 events => throughput is 6.98E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
+ [XSECTION] Cross section = 0.3314 [0.33144937378275385] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.4963s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3103s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1836s for     8192 events => throughput is 6.92E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063772189507497) differ by less than 4E-4 (4.55024909951085e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144937378275385) differ by less than 4E-4 (4.550249099066761e-06)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -241,23 +241,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154543829232032] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   14.7697s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8367s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.9305s for    90112 events => throughput is 6.97E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
+ [XSECTION] Cross section = 0.2093 [0.20930324959819654] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.1993s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1207s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.0762s for    81920 events => throughput is 6.78E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154543829232032) differ by less than 4E-4 (2.9453998497963596e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930324959819654) differ by less than 4E-4 (3.2006567445286294e-06)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -266,12 +266,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.148962e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.996317e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.178836e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.839536e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -293,16 +293,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7622s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2497s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5111s for     8192 events => throughput is 1.60E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8442s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3095s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5334s for     8192 events => throughput is 1.54E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063776840958710) differ by less than 4E-4 (4.609834644009325e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -321,23 +321,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    7.4857s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8369s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.6474s for    90112 events => throughput is 1.60E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.3660s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1115s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.2532s for    81920 events => throughput is 1.56E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154546188078725) differ by less than 4E-4 (3.0685483654036005e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -346,12 +346,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.642140e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.604925e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.645647e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.604030e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -373,16 +373,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063776840958710] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7061s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2524s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4523s for     8192 events => throughput is 1.81E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [XSECTION] Cross section = 0.3314 [0.33144939353225550] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7886s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3135s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4739s for     8192 events => throughput is 1.73E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063776840958710) differ by less than 4E-4 (4.609834644009325e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144939353225550) differ by less than 4E-4 (4.609834643787281e-06)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -401,23 +401,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154546188078725] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    6.8659s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8249s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.0396s for    90112 events => throughput is 1.79E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
+ [XSECTION] Cross section = 0.2093 [0.20930327551379133] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    6.8107s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1081s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.7015s for    81920 events => throughput is 1.74E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154546188078725) differ by less than 4E-4 (3.0685483654036005e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930327551379133) differ by less than 4E-4 (3.3244755468508913e-06)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -426,12 +426,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858052e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.805124e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.875566e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.796242e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -453,16 +453,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063796149473241] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8115s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2523s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5576s for     8192 events => throughput is 1.47E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 0.3314 [0.33144947551388249] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8988s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3122s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5852s for     8192 events => throughput is 1.40E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063796149473241) differ by less than 4E-4 (4.857178601991308e-06)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144947551388249) differ by less than 4E-4 (4.857178601991308e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -481,23 +481,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154549699006573] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    8.1202s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8433s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.2752s for    90112 events => throughput is 1.44E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 0.2093 [0.20930331717025510] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.0322s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1227s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.9080s for    81920 events => throughput is 1.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154549699006573) differ by less than 4E-4 (3.251843682772204e-06)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930331717025510) differ by less than 4E-4 (3.523500632152121e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -506,12 +506,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.477080e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.400554e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.470763e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.422518e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -533,16 +533,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063814953416677] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7381s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6865s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0270s for     8192 events => throughput is 3.04E+05 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144955535316123] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.7991s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7473s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0272s for     8192 events => throughput is 3.01E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0246s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cuda (0.78063814953416677) differ by less than 4E-4 (5.098058954988716e-06)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144955535316123) differ by less than 4E-4 (5.0980589545446264e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -561,23 +561,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154554638015539] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5327s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2524s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2557s for    90112 events => throughput is 3.52E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0246s
+ [XSECTION] Cross section = 0.2093 [0.20930336562619947] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7864s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5289s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2329s for    81920 events => throughput is 3.52E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0245s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cuda (0.19154554638015539) differ by less than 4E-4 (3.5096949559942914e-06)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930336562619947) differ by less than 4E-4 (3.755012085271403e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -586,42 +586,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.116091e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.087446e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.389662e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.366930e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.096626e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.074769e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.211336e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.213607e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.102050e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.112109e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.224185e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.224381e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.086520e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.063345e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.391380e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.392863e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index a2933249ce..abdc3e6985 100644
--- a/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:10:03
+DATE: 2024-09-15_12:35:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
  [UNWEIGHT] Wrote 7 events (found 223 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4699s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2529s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2170s for     8192 events => throughput is 1.94E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    4.6491s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3163s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3328s for     8192 events => throughput is 1.89E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x1_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416981514677] fbridge_mode=0
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.4662s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2467s
- [COUNTERS] Fortran MEs      ( 1 ) :    4.2195s for     8192 events => throughput is 1.94E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786561240197] fbridge_mode=0
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.6308s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3143s
+ [COUNTERS] Fortran MEs      ( 1 ) :    4.3165s for     8192 events => throughput is 1.90E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttgg_x10_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487411607687] fbridge_mode=0
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   48.1948s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8028s
- [COUNTERS] Fortran MEs      ( 1 ) :   46.3919s for    90112 events => throughput is 1.94E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930257969248323] fbridge_mode=0
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   45.2197s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1385s
+ [COUNTERS] Fortran MEs      ( 1 ) :   43.0812s for    81920 events => throughput is 1.90E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063417389679768] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    4.6823s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2633s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    4.4100s for     8192 events => throughput is 1.86E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0090s
+ [XSECTION] Cross section = 0.3314 [0.33144786734542164] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.8483s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3147s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    4.5249s for     8192 events => throughput is 1.81E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0088s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417389679768) differ by less than 2E-4 (5.2286346363672465e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786734542164) differ by less than 2E-4 (5.228634192278037e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487480444804] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   50.6321s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8415s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   48.7818s for    90112 events => throughput is 1.85E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0088s
+ [XSECTION] Cross section = 0.2093 [0.20930258048084049] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   47.5178s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1281s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   45.3807s for    81920 events => throughput is 1.81E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0090s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487480444804) differ by less than 2E-4 (3.593785269373484e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258048084049) differ by less than 2E-4 (3.766591261111785e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.912608e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.872546e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.910493e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.871624e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063417194462525] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5988s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2528s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3410s for     8192 events => throughput is 3.50E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
+ [XSECTION] Cross section = 0.3314 [0.33144786651655289] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7061s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3125s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3887s for     8192 events => throughput is 3.43E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0049s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417194462525) differ by less than 2E-4 (2.7278828085286477e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786651655289) differ by less than 2E-4 (2.7278828085286477e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487462148848] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   27.5594s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8414s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7133s for    90112 events => throughput is 3.50E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0047s
+ [XSECTION] Cross section = 0.2093 [0.20930258019984904] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   26.0597s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1243s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   23.9304s for    81920 events => throughput is 3.42E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0050s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487462148848) differ by less than 2E-4 (2.63860666649407e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019984904) differ by less than 2E-4 (2.424078271445751e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.589262e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.500917e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.503515e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.521414e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.2772s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2553s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0194s for     8192 events => throughput is 8.04E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.3556s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3120s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.0411s for     8192 events => throughput is 7.87E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0025s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   13.0793s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.2057s for    90112 events => throughput is 8.04E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   12.5367s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1235s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.4107s for    81920 events => throughput is 7.87E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0024s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.252678e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.110548e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.263765e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.032234e+03                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.1414s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2534s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8858s for     8192 events => throughput is 9.25E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.2161s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3129s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.9010s for     8192 events => throughput is 9.09E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   11.5724s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8374s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.7328s for    90112 events => throughput is 9.26E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   11.1682s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1205s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    9.0456s for    81920 events => throughput is 9.06E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0022s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.552325e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.335833e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.532682e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.254922e+03                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063417138500590] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3940s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.2528s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1383s for     8192 events => throughput is 7.20E+03 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786627894518] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.5079s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3120s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1930s for     8192 events => throughput is 6.87E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cpp (0.78063417138500590) differ by less than 2E-4 (2.0110049181454315e-09)
+OK! xsec from fortran (0.33144786561240197) and cpp (0.33144786627894518) differ by less than 2E-4 (2.0110046961008265e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487463123707] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :   14.3865s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8402s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.5435s for    90112 events => throughput is 7.18E+03 events/s
+ [XSECTION] Cross section = 0.2093 [0.20930258019863174] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.0414s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1292s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9095s for    81920 events => throughput is 6.88E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cpp (0.19154487463123707) differ by less than 2E-4 (2.6895012883443314e-09)
+OK! xsec from fortran (0.20930257969248323) and cpp (0.20930258019863174) differ by less than 2E-4 (2.4182622571089496e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.283591e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.939493e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.319158e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.983445e+03                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 123 channels { 112 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.7806 [0.78063416917067197] fbridge_mode=1
- [UNWEIGHT] Wrote 10 events (found 192 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7609s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6877s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0383s for     8192 events => throughput is 2.14E+05 events/s
+ [XSECTION] Cross section = 0.3314 [0.33144786533876569] fbridge_mode=1
+ [UNWEIGHT] Wrote 7 events (found 213 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8281s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7547s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0386s for     8192 events => throughput is 2.12E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.78063416981514677) and cuda (0.78063416917067197) differ by less than 2E-4 (8.255784944566358e-10)
+OK! xsec from fortran (0.33144786561240197) and cuda (0.33144786533876569) differ by less than 2E-4 (8.255786054789382e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 104 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttgg_x10_cudacpp > /tmp/avalassi/output_ggttgg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 123 channels { 112 : 90112 }
+DEBUG: MEK processed 81920 events across 123 channels { 112 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 104
  [XSECTION] ChannelId = 112
- [XSECTION] Cross section = 0.1915 [0.19154487447873400] fbridge_mode=1
- [UNWEIGHT] Wrote 27 events (found 312 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6544s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2492s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3705s for    90112 events => throughput is 2.43E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0348s
+ [XSECTION] Cross section = 0.2093 [0.20930258003933860] fbridge_mode=1
+ [UNWEIGHT] Wrote 17 events (found 331 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.9070s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5436s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3289s for    81920 events => throughput is 2.49E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0345s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.19154487411607687) and cuda (0.19154487447873400) differ by less than 2E-4 (1.893327272739498e-09)
+OK! xsec from fortran (0.20930257969248323) and cuda (0.20930258003933860) differ by less than 2E-4 (1.6571959360334176e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.166594e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.146362e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.346522e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.341729e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.128597e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.115354e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.162506e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.164001e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.127115e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.119722e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.169117e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.169184e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.130360e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.126511e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.417587e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.426812e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 0cf7735a4c..22c56c63cf 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 
 
 make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:15:29
+DATE: 2024-09-15_12:41:22
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   98.0853s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4849s
- [COUNTERS] Fortran MEs      ( 1 ) :   97.6004s for     8192 events => throughput is 8.39E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  102.2091s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5630s
+ [COUNTERS] Fortran MEs      ( 1 ) :  101.6461s for     8192 events => throughput is 8.06E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   98.4132s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5077s
- [COUNTERS] Fortran MEs      ( 1 ) :   97.9055s for     8192 events => throughput is 8.37E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  102.3065s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5557s
+ [COUNTERS] Fortran MEs      ( 1 ) :  101.7508s for     8192 events => throughput is 8.05E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1077.8633s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4153s
- [COUNTERS] Fortran MEs      ( 1 ) : 1073.4480s for    90112 events => throughput is 8.39E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1015.5197s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.6072s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1010.9125s for    81920 events => throughput is 8.10E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :  121.6812s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4851s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  120.9979s for     8192 events => throughput is 6.77E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1982s
+ [COUNTERS] PROGRAM TOTAL          :  121.0029s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5478s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  120.2529s for     8192 events => throughput is 6.81E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2022s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148501E-006) differ by less than 3E-14 (2.6645352591003757e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130207E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1343.2729s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3800s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1338.6826s for    90112 events => throughput is 6.73E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2103s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633775E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1237.7089s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5156s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1232.9854s for    81920 events => throughput is 6.64E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2079s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130207E-007) differ by less than 3E-14 (1.3322676295501878e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633775E-007) differ by less than 3E-14 (1.5543122344752192e-15)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.808964e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.635196e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.773682e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.680081e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148497E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   60.4426s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4815s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   59.8630s for     8192 events => throughput is 1.37E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0981s
+ [COUNTERS] PROGRAM TOTAL          :   62.7563s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5430s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   62.1127s for     8192 events => throughput is 1.32E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1006s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148497E-006) differ by less than 3E-14 (2.220446049250313e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130209E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  669.7982s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3750s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  665.3245s for    90112 events => throughput is 1.35E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0987s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  620.1444s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5224s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  615.5209s for    81920 events => throughput is 1.33E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1011s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130209E-007) differ by less than 3E-14 (1.5543122344752192e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.620968e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.588919e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.623378e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.592321e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   28.8185s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4803s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   28.2908s for     8192 events => throughput is 2.90E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0473s
+ [COUNTERS] PROGRAM TOTAL          :   29.4160s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5466s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   28.8206s for     8192 events => throughput is 2.84E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0487s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  316.9557s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3978s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  312.5116s for    90112 events => throughput is 2.88E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0463s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  297.3512s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5469s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  292.7572s for    81920 events => throughput is 2.80E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0471s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.474527e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.275338e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.428059e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.265860e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   25.4499s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4870s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.9217s for     8192 events => throughput is 3.29E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0412s
+ [COUNTERS] PROGRAM TOTAL          :   27.1464s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9932s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   26.1100s for     8192 events => throughput is 3.14E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0432s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  278.1997s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5246s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  273.6350s for    90112 events => throughput is 3.29E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0401s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  264.6969s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5790s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  260.0758s for    81920 events => throughput is 3.15E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0421s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.008315e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.866714e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.020269e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.924843e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148499E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282467E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   25.4182s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4828s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.8908s for     8192 events => throughput is 3.29E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0447s
+ [COUNTERS] PROGRAM TOTAL          :   26.5115s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9718s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.4924s for     8192 events => throughput is 3.21E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0472s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575548148499E-006) differ by less than 3E-14 (2.4424906541753444e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561551282467E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130220E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  279.7174s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.3551s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  275.3169s for    90112 events => throughput is 3.27E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0454s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633781E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  259.7252s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4963s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  255.1820s for    81920 events => throughput is 3.21E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0469s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741825130220E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713115633781E-007) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.540558e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.450525e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.595223e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.456130e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148501E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282475E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :    3.1566s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.9705s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1003s for     8192 events => throughput is 7.45E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0858s
+ [COUNTERS] PROGRAM TOTAL          :    3.2899s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0907s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.1026s for     8192 events => throughput is 7.43E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0966s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2009575548148501E-006) differ by less than 3E-14 (2.6645352591003757e-15)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561551282475E-007) differ by less than 3E-14 (2.4424906541753444e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130222E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :   17.8702s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.8335s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   11.9487s for    90112 events => throughput is 7.54E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0880s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633791E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   16.9895s
+ [COUNTERS] Fortran Overhead ( 0 ) :    5.0379s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   10.8621s for    81920 events => throughput is 7.54E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    1.0895s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2641741825130222E-007) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713115633791E-007) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.494172e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.484119e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.256504e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.258297e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.274646e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.228572e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.540338e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.575774e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.264214e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.255424e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.441628e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.453521e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.277309e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.234529e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.246710e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.253800e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index 559911b6ff..ecfc5d7b2f 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_08:37:44
+DATE: 2024-09-15_13:59:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   98.3589s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4847s
- [COUNTERS] Fortran MEs      ( 1 ) :   97.8742s for     8192 events => throughput is 8.37E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  101.5343s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5565s
+ [COUNTERS] Fortran MEs      ( 1 ) :  100.9778s for     8192 events => throughput is 8.11E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   97.9037s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4807s
- [COUNTERS] Fortran MEs      ( 1 ) :   97.4230s for     8192 events => throughput is 8.41E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  101.8885s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5530s
+ [COUNTERS] Fortran MEs      ( 1 ) :  101.3354s for     8192 events => throughput is 8.08E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1108.9567s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5470s
- [COUNTERS] Fortran MEs      ( 1 ) : 1104.4097s for    90112 events => throughput is 8.16E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1019.5275s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.6107s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1014.9167s for    81920 events => throughput is 8.07E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -133,16 +133,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011250641073541E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575849446922190E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :  113.8614s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4939s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  113.1804s for     8192 events => throughput is 7.24E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1871s
+ [COUNTERS] PROGRAM TOTAL          :  113.9851s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5716s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  113.2257s for     8192 events => throughput is 7.24E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1878s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011250641073541E-006) differ by less than 4E-4 (0.000139479777478968)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575849446922190E-007) differ by less than 4E-4 (0.00013947977747852391)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -162,23 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644964998437398E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1254.3527s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4844s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1249.6808s for    90112 events => throughput is 7.21E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1875s
+ [XSECTION] Cross section = 2.285e-07 [2.2845954405861011E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1127.1766s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5299s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1122.4594s for    81920 events => throughput is 7.30E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1874s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644964998437398E-007) differ by less than 4E-4 (0.0001423553599417815)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845954405861011E-007) differ by less than 4E-4 (0.00014189602657355138)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -187,12 +187,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.538641e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.606058e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.488951e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.616772e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -215,16 +215,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011248466338516E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575845178322101E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   28.3507s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4980s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.8049s for     8192 events => throughput is 2.95E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0478s
+ [COUNTERS] PROGRAM TOTAL          :   28.4233s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5609s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.8152s for     8192 events => throughput is 2.95E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0471s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248466338516E-006) differ by less than 4E-4 (0.00013929869405782114)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845178322101E-007) differ by less than 4E-4 (0.0001392986940575991)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -244,23 +244,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644960006557758E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  310.5929s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4914s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  306.0526s for    90112 events => throughput is 2.94E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0488s
+ [XSECTION] Cross section = 2.285e-07 [2.2845949484525033E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  283.7955s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5114s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  279.2380s for    81920 events => throughput is 2.93E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0461s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644960006557758E-007) differ by less than 4E-4 (0.00014213488752057302)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845949484525033E-007) differ by less than 4E-4 (0.00014168058211416756)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -269,12 +269,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.372253e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.393625e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.379357e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.397500e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -297,16 +297,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   14.8725s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4938s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.3547s for     8192 events => throughput is 5.71E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0240s
+ [COUNTERS] PROGRAM TOTAL          :   14.8014s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5481s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   14.2298s for     8192 events => throughput is 5.76E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0235s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248461798598E-006) differ by less than 4E-4 (0.0001392983160330985)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -326,23 +326,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  162.3269s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4878s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  157.8145s for    90112 events => throughput is 5.71E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0246s
+ [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  148.1207s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4972s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  143.5998s for    81920 events => throughput is 5.70E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0237s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644951222621394E-007) differ by less than 4E-4 (0.000141746934313014)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -351,12 +351,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.811808e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.850524e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.750586e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.880069e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -379,16 +379,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011248461798598E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575845169411084E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   13.3225s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4965s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8043s for     8192 events => throughput is 6.40E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0217s
+ [COUNTERS] PROGRAM TOTAL          :   13.6509s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5589s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.0709s for     8192 events => throughput is 6.27E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0212s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011248461798598E-006) differ by less than 4E-4 (0.0001392983160330985)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575845169411084E-007) differ by less than 4E-4 (0.0001392983160326544)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -408,23 +408,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644951222621394E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  146.7771s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4866s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  142.2688s for    90112 events => throughput is 6.33E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0217s
+ [XSECTION] Cross section = 2.285e-07 [2.2845940747287339E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  135.4503s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5206s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  130.9090s for    81920 events => throughput is 6.26E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0207s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644951222621394E-007) differ by less than 4E-4 (0.000141746934313014)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845940747287339E-007) differ by less than 4E-4 (0.0001412980864952118)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -433,12 +433,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.705366e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.634471e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.670587e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.589322e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -461,16 +461,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011251360912330E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575850859831750E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   13.4800s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4932s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   12.9621s for     8192 events => throughput is 6.32E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0247s
+ [COUNTERS] PROGRAM TOTAL          :   13.3326s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5808s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.7254s for     8192 events => throughput is 6.44E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0265s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2011251360912330E-006) differ by less than 4E-4 (0.00013953971621583072)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3575850859831750E-007) differ by less than 4E-4 (0.00013953971621538663)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -490,23 +490,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644957106171463E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  145.7072s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4846s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  141.1985s for    90112 events => throughput is 6.38E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0241s
+ [XSECTION] Cross section = 2.285e-07 [2.2845946568145136E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  131.7116s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5046s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  127.1838s for    81920 events => throughput is 6.44E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0233s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2644957106171463E-007) differ by less than 4E-4 (0.00014200678844056291)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2845946568145136E-007) differ by less than 4E-4 (0.00014155290989403824)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -515,12 +515,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.841319e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.895916e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.788003e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.878269e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -542,16 +542,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2011257191623754E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.358e-07 [2.3575862304433055E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :    2.0804s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.9858s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5414s for     8192 events => throughput is 1.51E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5532s
+ [COUNTERS] PROGRAM TOTAL          :    2.1535s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.0557s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5385s for     8192 events => throughput is 1.52E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5593s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2011257191623754E-006) differ by less than 4E-4 (0.00014002522141964846)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3575862304433055E-007) differ by less than 4E-4 (0.00014002522141920437)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -570,23 +570,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2644969729873264E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :   11.3529s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9667s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.8330s for    90112 events => throughput is 1.54E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5531s
+ [XSECTION] Cross section = 2.285e-07 [2.2845959888250639E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   10.9212s
+ [COUNTERS] Fortran Overhead ( 0 ) :    5.0072s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.3407s for    81920 events => throughput is 1.53E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.5732s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2644969729873264E-007) differ by less than 4E-4 (0.0001425643295476231)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2845959888250639E-007) differ by less than 4E-4 (0.0001421360326359089)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -595,42 +595,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.531202e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.542523e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.528697e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.546779e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.145335e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.146798e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.156492e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.138507e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.149709e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.102140e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.142704e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.148371e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.153881e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.144042e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.001184e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.992297e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index 2b83738bd0..f052f133a7 100644
--- a/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
-
 
+make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_09:42:04
+DATE: 2024-09-15_14:59:11
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :  100.8249s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4958s
- [COUNTERS] Fortran MEs      ( 1 ) :  100.3291s for     8192 events => throughput is 8.17E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  101.7559s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5531s
+ [COUNTERS] Fortran MEs      ( 1 ) :  101.2028s for     8192 events => throughput is 8.09E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x1_fortran > /tmp/a
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575548148470E-006] fbridge_mode=0
+ [XSECTION] Cross section = 2.357e-07 [2.3572561551282417E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :  100.7299s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.5005s
- [COUNTERS] Fortran MEs      ( 1 ) :  100.2294s for     8192 events => throughput is 8.17E+01 events/s
+ [COUNTERS] PROGRAM TOTAL          :  102.2220s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5595s
+ [COUNTERS] Fortran MEs      ( 1 ) :  101.6626s for     8192 events => throughput is 8.06E+01 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_ggttggg_x10_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741825130175E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1108.8237s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.5214s
- [COUNTERS] Fortran MEs      ( 1 ) : 1104.3024s for    90112 events => throughput is 8.16E+01 events/s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713115633741E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1016.2987s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.6111s
+ [COUNTERS] Fortran MEs      ( 1 ) : 1011.6876s for    81920 events => throughput is 8.10E+01 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575613215040E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561678995975E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :  128.8019s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4954s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  128.0916s for     8192 events => throughput is 6.40E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2149s
+ [COUNTERS] PROGRAM TOTAL          :  121.8737s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5730s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  121.0897s for     8192 events => throughput is 6.77E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2110s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575613215040E-006) differ by less than 2E-4 (5.417890802661418e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561678995975E-007) differ by less than 2E-4 (5.417890580616813e-09)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741947481977E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          : 1433.7153s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4970s
- [COUNTERS] CudaCpp MEs      ( 2 ) : 1429.0044s for    90112 events => throughput is 6.31E+01 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2140s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713238614534E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          : 1278.2990s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5229s
+ [COUNTERS] CudaCpp MEs      ( 2 ) : 1273.5682s for    81920 events => throughput is 6.43E+01 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.2078s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741947481977E-007) differ by less than 2E-4 (5.40381583924443e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713238614534E-007) differ by less than 2E-4 (5.38380851011766e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.483669e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.297738e+01                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.450709e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.358504e+01                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575624556593E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561701257335E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   64.0631s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4951s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   63.4631s for     8192 events => throughput is 1.29E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1049s
+ [COUNTERS] PROGRAM TOTAL          :   67.0430s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5843s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   66.3507s for     8192 events => throughput is 1.23E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1079s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575624556593E-006) differ by less than 2E-4 (6.3622667134666244e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561701257335E-007) differ by less than 2E-4 (6.3622664914220195e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741950090396E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  691.6461s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4363s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  687.1049s for    90112 events => throughput is 1.31E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1049s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713242471448E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  651.0804s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5257s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  646.4467s for    81920 events => throughput is 1.27E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.1080s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741950090396E-007) differ by less than 2E-4 (5.519019907751499e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713242471448E-007) differ by less than 2E-4 (5.552655002460938e-09)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.547673e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.537162e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.575361e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.537881e+02                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   28.1802s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4830s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.6511s for     8192 events => throughput is 2.96E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0461s
+ [COUNTERS] PROGRAM TOTAL          :   28.5211s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5700s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   27.9053s for     8192 events => throughput is 2.94E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0458s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  312.1970s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4214s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  307.7313s for    90112 events => throughput is 2.93E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0444s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  283.4125s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.4921s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  278.8756s for    81920 events => throughput is 2.94E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0448s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.529434e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.574406e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.534956e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.562894e+02                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   25.0518s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4954s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   24.5169s for     8192 events => throughput is 3.34E+02 events/s
+ [COUNTERS] PROGRAM TOTAL          :   24.8698s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5658s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.2646s for     8192 events => throughput is 3.38E+02 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0395s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  274.0316s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4537s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  269.5401s for    90112 events => throughput is 3.34E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0378s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  248.6338s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5381s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  244.0566s for    81920 events => throughput is 3.36E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0391s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.093052e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.158059e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.105741e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.143650e+02                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575626927521E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561705911026E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :   26.5352s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4935s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   25.9943s for     8192 events => throughput is 3.15E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0474s
+ [COUNTERS] PROGRAM TOTAL          :   26.3376s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.5632s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.7273s for     8192 events => throughput is 3.18E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0471s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cpp (1.2009575626927521E-006) differ by less than 2E-4 (6.55968657170547e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cpp (2.3572561705911026E-007) differ by less than 2E-4 (6.559686349660865e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741946798811E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :  288.1592s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.4946s
- [COUNTERS] CudaCpp MEs      ( 2 ) :  283.6177s for    90112 events => throughput is 3.18E+02 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0469s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713241239113E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :  261.5729s
+ [COUNTERS] Fortran Overhead ( 0 ) :    4.5211s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :  257.0034s for    81920 events => throughput is 3.19E+02 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0484s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cpp (2.2641741946798811E-007) differ by less than 2E-4 (5.3736428640149825e-09)
+OK! xsec from fortran (2.2842713115633741E-007) and cpp (2.2842713241239113E-007) differ by less than 2E-4 (5.498706379114537e-09)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.498867e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.487285e+02                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.594525e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.483613e+02                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 1240 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 1.201e-06 [1.2009575531257951E-006] fbridge_mode=1
+ [XSECTION] Cross section = 2.357e-07 [2.3572561518129465E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 18 events (found 285 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7488s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.9990s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8781s for     8192 events => throughput is 9.33E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8717s
+ [COUNTERS] PROGRAM TOTAL          :    2.8854s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.1263s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8776s for     8192 events => throughput is 9.33E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8815s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (1.2009575548148470E-006) and cuda (1.2009575531257951E-006) differ by less than 2E-4 (1.4064209796771365e-09)
+OK! xsec from fortran (2.3572561551282417E-007) and cuda (2.3572561518129465E-007) differ by less than 2E-4 (1.4064212017217415e-09)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_ggttggg_x10_cudacpp > /tmp/avalassi/output_ggttggg_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 1240 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 1240 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 128/128
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.264e-07 [2.2641741822510739E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 418 events (found 1570 events)
- [COUNTERS] PROGRAM TOTAL          :   15.2656s
- [COUNTERS] Fortran Overhead ( 0 ) :    4.9039s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    9.4902s for    90112 events => throughput is 9.50E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8715s
+ [XSECTION] Cross section = 2.284e-07 [2.2842713109538129E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 380 events (found 1707 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.5688s
+ [COUNTERS] Fortran Overhead ( 0 ) :    5.0388s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    8.6484s for    81920 events => throughput is 9.47E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.8816s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.2641741825130175E-007) and cuda (2.2641741822510739E-007) differ by less than 2E-4 (1.1569056823645951e-10)
+OK! xsec from fortran (2.2842713115633741E-007) and cuda (2.2842713109538129E-007) differ by less than 2E-4 (2.668514298420632e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.413169e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.426842e+03                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.072902e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.075881e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.107520e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.106694e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 512 32 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.152995e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.154223e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.106323e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.105665e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 128 128 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.103746e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.108609e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 --bridge ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.102767e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.106895e+04                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 2048 8 1 ***
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.685267e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.678273e+03                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index 5909437ffc..ae3de12b01 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:14:02
+DATE: 2024-09-15_12:39:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
  [UNWEIGHT] Wrote 506 events (found 1943 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4764s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4060s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0704s for     8192 events => throughput is 1.16E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.5503s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4781s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0722s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4005s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3294s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0711s for     8192 events => throughput is 1.15E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4980s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4263s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0717s for     8192 events => throughput is 1.14E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.2516s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.4776s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7740s for    90112 events => throughput is 1.16E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6426s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9227s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7199s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600993] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4135s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0765s for     8192 events => throughput is 1.07E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737132] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5037s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4265s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0766s for     8192 events => throughput is 1.07E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520600993) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737132) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3617s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5265s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8345s for    90112 events => throughput is 1.08E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7326s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9466s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7853s for    81920 events => throughput is 1.04E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293388) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.047847e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.069256e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.047146e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.071686e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520601043] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3824s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3397s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0421s for     8192 events => throughput is 1.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737170] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4715s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4278s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0433s for     8192 events => throughput is 1.89E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601043) differ by less than 3E-14 (1.9984014443252818e-15)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737170) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293380] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9947s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5274s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4667s for    90112 events => throughput is 1.93E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3706s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9391s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4311s for    81920 events => throughput is 1.90E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293380) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427590) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.951130e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.920806e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.939973e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.914858e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3596s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3349s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0241s for     8192 events => throughput is 3.40E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4519s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4266s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0249s for     8192 events => throughput is 3.29E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7877s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5214s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2657s for    90112 events => throughput is 3.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1855s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9345s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2505s for    81920 events => throughput is 3.27E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.368351e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.315064e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.359295e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.353486e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3561s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3342s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0213s for     8192 events => throughput is 3.84E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4550s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4315s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0230s for     8192 events => throughput is 3.56E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7717s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5312s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2400s for    90112 events => throughput is 3.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1824s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9556s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2263s for    81920 events => throughput is 3.62E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.727346e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.736761e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.654587e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.761611e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520601038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3689s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3346s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0336s for     8192 events => throughput is 2.44E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737162] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4645s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4291s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0349s for     8192 events => throughput is 2.35E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385520601038) differ by less than 3E-14 (1.7763568394002505e-15)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504505737162) differ by less than 3E-14 (1.7763568394002505e-15)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8979s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5359s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3614s for    90112 events => throughput is 2.49E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.211 [0.21095842877427592] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.2885s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9460s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3420s for    81920 events => throughput is 2.40E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877427592) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.506558e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.383703e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.444740e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.334002e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520601049] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7705s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7662s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.71E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737173] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8655s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8611s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.48E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cuda (0.27343385520601049) differ by less than 3E-14 (2.220446049250313e-15)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504505737173) differ by less than 3E-14 (2.220446049250313e-15)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293385] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9624s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9506s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0106s for    90112 events => throughput is 8.51E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.211 [0.21095842877427598] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3721s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3607s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0103s for    81920 events => throughput is 7.97E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cuda (0.21182382983293385) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842877427598) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.030039e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.721561e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.498510e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.065390e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.346135e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.156601e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.208610e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.991698e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.346446e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.159535e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.351501e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.254663e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.350079e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.182535e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.647846e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.650805e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index 7681a43fcc..c9aade28c7 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 make USEBUILDDIR=1 BACKEND=cuda
 
+
 make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make USEBUILDDIR=1 BACKEND=cpp512y
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:14:32
+DATE: 2024-09-15_12:40:18
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
  [UNWEIGHT] Wrote 506 events (found 1943 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4752s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4052s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0700s for     8192 events => throughput is 1.17E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.5486s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4755s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0730s for     8192 events => throughput is 1.12E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4005s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3300s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0705s for     8192 events => throughput is 1.16E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4998s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4285s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0713s for     8192 events => throughput is 1.15E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.2559s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.4804s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7755s for    90112 events => throughput is 1.16E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6509s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9299s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7210s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343387711996092] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4065s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3335s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0723s for     8192 events => throughput is 1.13E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.2031 [0.20313506133732837] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5035s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4289s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0741s for     8192 events => throughput is 1.11E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343387711996092) differ by less than 4E-4 (8.014351782215101e-08)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313506133732837) differ by less than 4E-4 (8.014351782215101e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182383177444153] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3139s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5293s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7840s for    90112 events => throughput is 1.15E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842907143103] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6726s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9382s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7339s for    81920 events => throughput is 1.12E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182383177444153) differ by less than 4E-4 (9.165671555066979e-09)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842907143103) differ by less than 4E-4 (1.4085954624931674e-09)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.144617e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.135662e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.152269e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.136549e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343383490650730] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3584s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3319s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0260s for     8192 events => throughput is 3.15E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.2031 [0.20313502997679400] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4543s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4272s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0268s for     8192 events => throughput is 3.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343383490650730) differ by less than 4E-4 (7.423917047777451e-08)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502997679400) differ by less than 4E-4 (7.423917058879681e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182379928139489] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8149s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5264s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2880s for    90112 events => throughput is 3.13E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.211 [0.21095839656505114] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.2055s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9374s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2678s for    81920 events => throughput is 3.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379928139489) differ by less than 4E-4 (1.4423088756654323e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839656505114) differ by less than 4E-4 (1.5268043562777223e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.005069e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.039729e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.006782e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.985614e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3632s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3490s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0138s for     8192 events => throughput is 5.94E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4424s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4288s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0133s for     8192 events => throughput is 6.18E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343382982076725) differ by less than 4E-4 (9.283869628617936e-08)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7147s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5632s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1511s for    90112 events => throughput is 5.96E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0714s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9365s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1346s for    81920 events => throughput is 6.08E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379586387554) differ by less than 4E-4 (1.6036466898849966e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.215848e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.180640e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.190438e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.307549e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343382982076725] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3458s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3334s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0120s for     8192 events => throughput is 6.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.2031 [0.20313502619857851] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4399s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4274s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0123s for     8192 events => throughput is 6.68E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343382982076725) differ by less than 4E-4 (9.283869628617936e-08)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313502619857851) differ by less than 4E-4 (9.283869628617936e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182379586387554] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6562s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5235s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1324s for    90112 events => throughput is 6.81E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.211 [0.21095839412856376] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0732s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9485s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1245s for    81920 events => throughput is 6.58E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182379586387554) differ by less than 4E-4 (1.6036466898849966e-07)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095839412856376) differ by less than 4E-4 (1.6423004467469582e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.618737e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.792178e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.707552e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.753332e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343386589929475] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3500s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3324s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0171s for     8192 events => throughput is 4.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.2031 [0.20313505300145301] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4441s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4268s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0170s for     8192 events => throughput is 4.83E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343386589929475) differ by less than 4E-4 (3.9107391769377386e-08)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313505300145301) differ by less than 4E-4 (3.910739154733278e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382325497387] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7111s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5292s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1814s for    90112 events => throughput is 4.97E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842133012335] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1153s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9447s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1702s for    81920 events => throughput is 4.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382325497387) differ by less than 4E-4 (3.105391876978558e-08)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842133012335) differ by less than 4E-4 (3.528729641821826e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.952119e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.847714e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.787120e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.878988e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343391019497171] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7743s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7705s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0029s for     8192 events => throughput is 2.83E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [XSECTION] Cross section = 0.2031 [0.20313508590887899] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8653s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8613s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.60E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cuda (0.27343391019497171) differ by less than 4E-4 (2.011051696282351e-07)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313508590887899) differ by less than 4E-4 (2.011051698502797e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182386711435958] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9653s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9551s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for    90112 events => throughput is 9.69E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [XSECTION] Cross section = 0.211 [0.21095846337765808] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3785s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3688s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0089s for    81920 events => throughput is 9.25E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cuda (0.21182386711435958) differ by less than 4E-4 (1.7600203783274537e-07)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095846337765808) differ by less than 4E-4 (1.640293887383848e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.122207e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.041241e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.428061e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.251569e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.203636e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.842862e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.653874e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.410741e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.231865e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.813185e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.767629e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.550006e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.821899e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.522713e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.239707e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.198390e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index 22704e3e7a..e0b5569f21 100644
--- a/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,4 +1,4 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 make USEBUILDDIR=1 BACKEND=cuda
 
@@ -7,35 +7,35 @@ make USEBUILDDIR=1 BACKEND=cppnone
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
-make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make USEBUILDDIR=1 BACKEND=cpp512y
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_07:14:59
+DATE: 2024-09-15_12:40:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gu_ttxu
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
  [UNWEIGHT] Wrote 506 events (found 1943 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4903s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.4187s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0716s for     8192 events => throughput is 1.14E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.5511s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4774s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0737s for     8192 events => throughput is 1.11E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x1_fortran > /tmp/ava
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385520600988] fbridge_mode=0
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4016s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3303s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0713s for     8192 events => throughput is 1.15E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504505737126] fbridge_mode=0
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5034s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4306s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0728s for     8192 events => throughput is 1.13E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_gqttq_x10_fortran > /tmp/av
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382983293388] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.2740s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.4925s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.7816s for    90112 events => throughput is 1.15E+05 events/s
+ [XSECTION] Cross section = 0.211 [0.21095842877427595] fbridge_mode=0
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.6463s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9275s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.7189s for    81920 events => throughput is 1.14E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4109s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3350s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0750s for     8192 events => throughput is 1.09E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.2031 [0.20313504495344831] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.5152s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4367s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0774s for     8192 events => throughput is 1.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385506612239) differ by less than 2E-4 (5.115953216616731e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344831) differ by less than 2E-4 (5.115954326839756e-10)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382982924081] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.3489s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5147s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.8334s for    90112 events => throughput is 1.08E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7101s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9336s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7758s for    81920 events => throughput is 1.06E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382982924081) differ by less than 2E-4 (1.743460931180607e-11)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.093280e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.061169e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.058279e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.068944e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385506612239] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3819s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3400s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0413s for     8192 events => throughput is 1.98E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504495344833] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4707s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4276s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0426s for     8192 events => throughput is 1.92E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385506612239) differ by less than 2E-4 (5.115953216616731e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504495344833) differ by less than 2E-4 (5.115952106393706e-10)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382982924075] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9884s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5305s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4573s for    90112 events => throughput is 1.97E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842877343590] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3778s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9488s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4286s for    81920 events => throughput is 1.91E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382982924075) differ by less than 2E-4 (1.7434942378713458e-11)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842877343590) differ by less than 2E-4 (3.982036922423049e-12)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.933401e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.907975e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.983497e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.932977e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3703s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3442s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0254s for     8192 events => throughput is 3.22E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4572s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4311s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0257s for     8192 events => throughput is 3.19E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7997s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5351s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2641s for    90112 events => throughput is 3.41E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1817s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9356s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2456s for    81920 events => throughput is 3.34E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.395360e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.338992e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.412066e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.300070e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3571s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3348s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0217s for     8192 events => throughput is 3.78E+05 events/s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4505s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4278s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0223s for     8192 events => throughput is 3.68E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7596s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5252s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2338s for    90112 events => throughput is 3.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9487s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2194s for    81920 events => throughput is 3.73E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.800197e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.788360e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.822704e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.844368e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385527282038] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3712s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3363s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0343s for     8192 events => throughput is 2.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.2031 [0.20313504510700500] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4630s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4272s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0352s for     8192 events => throughput is 2.33E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cpp (0.27343385527282038) differ by less than 2E-4 (2.4433877143792415e-10)
+OK! xsec from fortran (0.20313504505737126) and cpp (0.20313504510700500) differ by less than 2E-4 (2.4433854939331923e-10)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382979024772] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9135s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.5362s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3766s for    90112 events => throughput is 2.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 0.211 [0.21095842875361914] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.2959s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.9441s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3513s for    81920 events => throughput is 2.33E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cpp (0.21182382979024772) differ by less than 2E-4 (2.0151724733352694e-10)
+OK! xsec from fortran (0.21095842877427595) and cpp (0.21095842875361914) differ by less than 2E-4 (9.791889521437724e-11)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.415462e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.305023e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.367663e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.367510e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 5 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2734 [0.27343385529180364] fbridge_mode=1
- [UNWEIGHT] Wrote 491 events (found 1236 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7720s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7677s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.66E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.2031 [0.20313504512110778] fbridge_mode=1
+ [UNWEIGHT] Wrote 499 events (found 1502 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8715s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8671s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.47E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.27343385520600988) and cuda (0.27343385529180364) differ by less than 2E-4 (3.1376412579220414e-10)
+OK! xsec from fortran (0.20313504505737126) and cuda (0.20313504512110778) differ by less than 2E-4 (3.1376434783680907e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_gqttq_x10_cudacpp > /tmp/avalassi/output_gqttq_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 5 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 5 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/32
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 0.2118 [0.21182382978588427] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    1.9744s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.9624s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0107s for    90112 events => throughput is 8.39E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [XSECTION] Cross section = 0.211 [0.21095842873460982] fbridge_mode=1
+ [UNWEIGHT] Wrote 2259 events (found 2264 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.3800s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.3686s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0103s for    81920 events => throughput is 7.99E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.21182382983293388) and cuda (0.21182382978588427) differ by less than 2E-4 (2.2211676942163194e-10)
+OK! xsec from fortran (0.21095842877427595) and cuda (0.21095842873460982) differ by less than 2E-4 (1.8802814860663375e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.042449e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.726178e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.516487e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.175370e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.381346e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.180169e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.139906e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.148445e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.358960e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.160013e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.309991e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.247186e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.360701e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.158935e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SM_GU_TTXU_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.653745e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.649343e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index d5bb978468..a0e790e59c 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 
 make USEBUILDDIR=1 BACKEND=cuda
-
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:07:03
+DATE: 2024-09-15_16:18:04
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 3371 events (found 6399 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9124s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8654s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    1.0034s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9555s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0479s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4132s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3664s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0468s for     8192 events => throughput is 1.75E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4837s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4346s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0490s for     8192 events => throughput is 1.67E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8115s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2952s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5163s for    90112 events => throughput is 1.75E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0766s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5991s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4774s for    81920 events => throughput is 1.72E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755170] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4167s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3663s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0499s for     8192 events => throughput is 1.64E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4969s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4447s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0517s for     8192 events => throughput is 1.58E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8401s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2881s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5515s for    90112 events => throughput is 1.63E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.1070s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5943s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5123s for    81920 events => throughput is 1.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187286) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.672440e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.652048e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.675836e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.659409e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -214,10 +214,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4019s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3730s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0284s for     8192 events => throughput is 2.88E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4727s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4425s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0298s for     8192 events => throughput is 2.75E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5909s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2912s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2992s for    90112 events => throughput is 3.01E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8851s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6055s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2793s for    81920 events => throughput is 2.93E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187286) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.069895e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.998650e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.024323e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.029895e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -294,9 +294,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3853s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3685s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0164s for     8192 events => throughput is 5.00E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4534s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4359s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0171s for     8192 events => throughput is 4.79E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4747s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2934s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1808s for    90112 events => throughput is 4.98E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7663s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5987s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1673s for    81920 events => throughput is 4.90E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187521) differ by less than 3E-14 (1.1546319456101628e-14)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.848671e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.936700e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.970653e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.961392e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -374,10 +374,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755165] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3835s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3677s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0153s for     8192 events => throughput is 5.35E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4506s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4348s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0154s for     8192 events => throughput is 5.33E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187521] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5035s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3279s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1752s for    90112 events => throughput is 5.14E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 2.034 [2.0336713375865476] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7471s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5932s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1536s for    81920 events => throughput is 5.33E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187521) differ by less than 3E-14 (1.1546319456101628e-14)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865476) differ by less than 3E-14 (9.325873406851315e-15)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.392257e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.479225e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.482164e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.515474e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -454,10 +454,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755179] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3915s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3681s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0228s for     8192 events => throughput is 3.59E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4603s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4365s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0234s for     8192 events => throughput is 3.49E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187290] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5398s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2947s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2446s for    90112 events => throughput is 3.68E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8294s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5963s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2327s for    81920 events => throughput is 3.52E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240372187290) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713375865285) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.476730e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.498096e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.588328e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.645446e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -534,9 +534,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755192] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8210s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8170s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.57E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8754s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8716s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.65E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187299] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7334s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7230s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0096s for    90112 events => throughput is 9.39E+06 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865294] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0505s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0405s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0093s for    81920 events => throughput is 8.85E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cuda (2.0342240372187299) differ by less than 3E-14 (6.661338147750939e-16)
+OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713375865294) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.870351e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.883515e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.356572e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.305682e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.807956e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.805842e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.149618e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.100377e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.828479e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.801291e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.485968e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.430840e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.818735e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.808875e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.484450e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.522017e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index e697772733..e348b5e95d 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:07:29
+DATE: 2024-09-15_16:18:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 3371 events (found 6399 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9127s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8660s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0467s for     8192 events => throughput is 1.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.9786s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9307s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0479s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4136s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3670s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0466s for     8192 events => throughput is 1.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4748s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4272s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0476s for     8192 events => throughput is 1.72E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8083s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2948s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5135s for    90112 events => throughput is 1.75E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0898s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6127s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4771s for    81920 events => throughput is 1.72E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -134,10 +134,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160406825242951] fbridge_mode=1
  [UNWEIGHT] Wrote 1653 events (found 1658 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4175s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0469s for     8192 events => throughput is 1.75E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [COUNTERS] PROGRAM TOTAL          :    0.4982s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4507s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0471s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -145,7 +145,7 @@ OK! xsec from fortran (2.0160081479755183) and cpp (2.0160406825242951) differ b
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 ERROR! events.lhe.cpp.1 and events.lhe.ref.1 differ!
-diff /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20
+diff /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.cpp.1 /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/events.lhe.ref.1 | head -20
 7562,7575d7561
 < 4 1 1E-03 0.1250010E+03 0.7546771E-02 0.1235066E+00
 <          21   -1    0    0  503  502  0.00000000000E+00  0.00000000000E+00  0.71320499473E+02  0.71320499473E+02  0.00000000000E+00 0.  1.
diff --git a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index 0da4f300a0..aee293444e 100644
--- a/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
-make USEBUILDDIR=1 BACKEND=cppnone
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:07:35
+DATE: 2024-09-15_16:18:40
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -58,9 +58,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 3371 events (found 6399 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9234s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8764s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0470s for     8192 events => throughput is 1.74E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.9822s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.9342s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0480s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -83,9 +83,9 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x1_fortran > /tmp/
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081479755183] fbridge_mode=0
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4146s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3683s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0463s for     8192 events => throughput is 1.77E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4747s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4267s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0480s for     8192 events => throughput is 1.71E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_heftggbb_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240372187286] fbridge_mode=0
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8031s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2900s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.5131s for    90112 events => throughput is 1.76E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713375865285] fbridge_mode=0
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0803s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6006s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4796s for    81920 events => throughput is 1.71E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -135,10 +135,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081964453331] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4191s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3685s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0502s for     8192 events => throughput is 1.63E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4941s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4424s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0513s for     8192 events => throughput is 1.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -162,23 +162,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240835006229] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.8344s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2853s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5487s for    90112 events => throughput is 1.64E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713843200420] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0975s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5888s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5083s for    81920 events => throughput is 1.61E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240835006229) differ by less than 2E-4 (2.2751621031602554e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200420) differ by less than 2E-4 (2.2979875113904313e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -189,13 +189,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.553823e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.547214e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.574287e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.555047e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -220,9 +220,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081964453336] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3944s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3670s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0270s for     8192 events => throughput is 3.03E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4608s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4324s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0280s for     8192 events => throughput is 2.92E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -247,23 +247,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240835006233] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5971s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2953s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3013s for    90112 events => throughput is 2.99E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713843200425] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8697s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5908s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2785s for    81920 events => throughput is 2.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240835006233) differ by less than 2E-4 (2.275162125364716e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713843200425) differ by less than 2E-4 (2.2979875335948918e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -274,13 +274,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.829435e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.808635e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.886586e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.832759e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -305,10 +305,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3863s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3690s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0168s for     8192 events => throughput is 4.86E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [COUNTERS] PROGRAM TOTAL          :    0.4521s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4343s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0175s for     8192 events => throughput is 4.69E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -332,23 +332,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4898s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3038s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1856s for    90112 events => throughput is 4.86E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7695s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5966s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1725s for    81920 events => throughput is 4.75E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564768) differ by less than 2E-4 (2.243496655118804e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -359,13 +359,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.691631e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.778867e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.845209e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.765875e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -390,9 +390,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081962974745] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3957s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3798s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0154s for     8192 events => throughput is 5.33E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.4553s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4389s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0160s for     8192 events => throughput is 5.11E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -417,23 +417,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240828564768] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4583s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2881s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1698s for    90112 events => throughput is 5.31E+05 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713836598665] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7602s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6033s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1565s for    81920 events => throughput is 5.24E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564768) differ by less than 2E-4 (2.243496655118804e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598665) differ by less than 2E-4 (2.265525278488667e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -444,13 +444,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.226881e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.160486e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.096712e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.247106e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -475,10 +475,10 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081962970020] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4060s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3808s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0247s for     8192 events => throughput is 3.32E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [COUNTERS] PROGRAM TOTAL          :    0.4605s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4361s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0241s for     8192 events => throughput is 3.40E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
@@ -502,23 +502,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240828564875] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5452s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2892s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2555s for    90112 events => throughput is 3.53E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 2.034 [2.0336713836598515] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8377s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5959s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2414s for    81920 events => throughput is 3.39E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cpp (2.0342240828564875) differ by less than 2E-4 (2.243497188025856e-08)
+OK! xsec from fortran (2.0336713375865285) and cpp (2.0336713836598515) differ by less than 2E-4 (2.2655245235370103e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -529,13 +529,13 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.160841e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.114993e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.259803e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.218557e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -559,9 +559,9 @@ DEBUG: MEK processed 8192 events across 4 channels { 1 : 8192 }
  [XSECTION] ChannelId = 1
  [XSECTION] Cross section = 2.016 [2.0160081483021330] fbridge_mode=1
  [UNWEIGHT] Wrote 1652 events (found 1657 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8155s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8115s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.63E+06 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8691s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8652s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.65E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
@@ -585,23 +585,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_heftggbb_x10_cudacpp > /tmp/avalassi/output_heftggbb_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 4 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 4 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 2.034 [2.0342240375655276] fbridge_mode=1
- [UNWEIGHT] Wrote 1858 events (found 1863 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7654s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7547s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0099s for    90112 events => throughput is 9.10E+06 events/s
+ [XSECTION] Cross section = 2.034 [2.0336713380111449] fbridge_mode=1
+ [UNWEIGHT] Wrote 1707 events (found 1712 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0649s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0550s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0092s for    81920 events => throughput is 8.92E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (2.0342240372187286) and cuda (2.0342240375655276) differ by less than 2E-4 (1.7048229494776024e-10)
+OK! xsec from fortran (2.0336713375865285) and cuda (2.0336713380111449) differ by less than 2E-4 (2.0879298290310544e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -610,42 +610,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.857446e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.889005e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.233421e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.160378e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.825838e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.798951e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.145470e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.069218e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.808566e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.788436e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.481866e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.498432e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.802589e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.792905e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.508007e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.500899e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index e3380f08bb..3944248170 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
-
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+
 
 make USEBUILDDIR=1 BACKEND=cppavx2
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:10:33
+DATE: 2024-09-15_16:21:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 902 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5826s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3167s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2659s for     8192 events => throughput is 3.62E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.7837s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4036s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3801s for     8192 events => throughput is 3.44E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5887s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3201s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2686s for     8192 events => throughput is 3.61E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.7457s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3886s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3571s for     8192 events => throughput is 3.48E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   26.7579s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8126s
- [COUNTERS] Fortran MEs      ( 1 ) :   24.9453s for    90112 events => throughput is 3.61E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.3649s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1183s
+ [COUNTERS] Fortran MEs      ( 1 ) :   23.2466s for    81920 events => throughput is 3.52E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028207996E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7584s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3213s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4318s for     8192 events => throughput is 3.37E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
+ [COUNTERS] PROGRAM TOTAL          :    2.8935s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3895s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4988s for     8192 events => throughput is 3.28E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207996E-007) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   28.6425s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8348s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.8024s for    90112 events => throughput is 3.36E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0053s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   27.0952s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1181s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.9720s for    81920 events => throughput is 3.28E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898148E-007) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.524694e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.400030e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.516868e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.433414e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208017E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728610E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    1.6021s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3244s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2748s for     8192 events => throughput is 6.43E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0028s
+ [COUNTERS] PROGRAM TOTAL          :    1.7089s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3967s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.3094s for     8192 events => throughput is 6.26E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028208017E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728610E-007) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123633E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   16.0101s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8332s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   14.1740s for    90112 events => throughput is 6.36E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898191E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   15.2155s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1129s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   13.0999s for    81920 events => throughput is 6.25E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123633E-007) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898191E-007) differ by less than 3E-14 (4.440892098500626e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.665998e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.519678e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.545421e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.485759e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8914s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3263s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5634s for     8192 events => throughput is 1.45E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [COUNTERS] PROGRAM TOTAL          :    0.9814s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3900s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5898s for     8192 events => throughput is 1.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    8.0725s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8311s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.2398s for    90112 events => throughput is 1.44E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.9019s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1015s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.7989s for    81920 events => throughput is 1.41E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.483755e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.452031e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.480557e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.449039e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8286s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3250s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5021s for     8192 events => throughput is 1.63E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    0.9097s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3947s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5136s for     8192 events => throughput is 1.60E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    7.3795s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8232s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.5548s for    90112 events => throughput is 1.62E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.2541s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0919s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.1608s for    81920 events => throughput is 1.59E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.673020e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.652584e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.683149e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.662622e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9729s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3250s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6460s for     8192 events => throughput is 1.27E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0019s
+ [COUNTERS] PROGRAM TOTAL          :    1.0779s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3969s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6792s for     8192 events => throughput is 1.21E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381610362728588E-007) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123675E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    9.0165s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8262s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.1883s for    90112 events => throughput is 1.25E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0020s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.8889s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0984s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.7887s for    81920 events => throughput is 1.21E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161305627123675E-007) differ by less than 3E-14 (4.440892098500626e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.273578e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.222969e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.277348e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.228238e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028207985E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728578E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7984s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7585s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0200s for     8192 events => throughput is 4.10E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0200s
+ [COUNTERS] PROGRAM TOTAL          :    0.8588s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8192s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.15E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6679976028207985E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610362728578E-007) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123665E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4531s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2399s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1934s for    90112 events => throughput is 4.66E+05 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898201E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7854s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5891s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1766s for    81920 events => throughput is 4.64E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161305627123665E-007) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926582898201E-007) differ by less than 3E-14 (6.661338147750939e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.222296e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.196050e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.524950e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.528954e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.852922e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.664076e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.236218e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.233050e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.863303e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.853541e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.201958e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.202405e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.851908e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.857146e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.683062e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.686465e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index 211fa0151b..e9deddff77 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
 make USEBUILDDIR=1 BACKEND=cppnone
-
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:12:57
+DATE: 2024-09-15_16:24:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 902 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6070s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3193s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2878s for     8192 events => throughput is 3.58E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.7015s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3761s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3254s for     8192 events => throughput is 3.52E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6125s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3260s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.2865s for     8192 events => throughput is 3.58E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.7015s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3807s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3208s for     8192 events => throughput is 3.53E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   26.7407s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8076s
- [COUNTERS] Fortran MEs      ( 1 ) :   24.9330s for    90112 events => throughput is 3.61E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.2760s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1081s
+ [COUNTERS] Fortran MEs      ( 1 ) :   23.1679s for    81920 events => throughput is 3.54E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680052401606547E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381686438954397E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.7184s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3253s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.3879s for     8192 events => throughput is 3.43E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
+ [COUNTERS] PROGRAM TOTAL          :    2.8693s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3947s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.4694s for     8192 events => throughput is 3.32E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680052401606547E-007) differ by less than 4E-4 (9.960018572119367e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686438954397E-007) differ by less than 4E-4 (9.960018576560259e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161357558617576E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   28.2547s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8365s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   26.4131s for    90112 events => throughput is 3.41E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0051s
+ [XSECTION] Cross section = 7.654e-07 [7.6542978900095690E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   26.6707s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1091s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   24.5567s for    81920 events => throughput is 3.34E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0048s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161357558617576E-007) differ by less than 4E-4 (6.818619180393171e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542978900095690E-007) differ by less than 4E-4 (6.835014008110818e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.508792e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.460698e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.516690e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.449222e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680037387484579E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381671483253128E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    1.0035s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3334s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6685s for     8192 events => throughput is 1.23E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    1.1130s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4027s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.7087s for     8192 events => throughput is 1.16E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680037387484579E-007) differ by less than 4E-4 (8.001994751261066e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381671483253128E-007) differ by less than 4E-4 (8.001994753481512e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161341270162819E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    9.2668s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8432s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.4220s for    90112 events => throughput is 1.21E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542962735029303E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    9.0988s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1256s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.9716s for    81920 events => throughput is 1.18E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161341270162819E-007) differ by less than 4E-4 (4.679940670548888e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542962735029303E-007) differ by less than 4E-4 (4.7231184874263477e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.238352e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.222480e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.217968e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.223984e+04                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6368s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3355s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3002s for     8192 events => throughput is 2.73E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
+ [COUNTERS] PROGRAM TOTAL          :    0.6864s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3911s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2945s for     8192 events => throughput is 2.78E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680038082583914E-007) differ by less than 4E-4 (8.092644145918371e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    5.1037s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8573s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.2453s for    90112 events => throughput is 2.78E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0011s
+ [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.0576s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0983s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.9584s for    81920 events => throughput is 2.77E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161368606547695E-007) differ by less than 4E-4 (8.269215387990414e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.898486e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.831089e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.915208e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.778084e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680038082583914E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381672175647812E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.5834s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3223s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2600s for     8192 events => throughput is 3.15E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [COUNTERS] PROGRAM TOTAL          :    0.6632s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3942s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2682s for     8192 events => throughput is 3.05E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680038082583914E-007) differ by less than 4E-4 (8.092644145918371e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381672175647812E-007) differ by less than 4E-4 (8.092644150359263e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161368606547695E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    4.7219s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8280s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.8930s for    90112 events => throughput is 3.11E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
+ [XSECTION] Cross section = 7.654e-07 [7.6542989697352719E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    4.7861s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0881s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.6971s for    81920 events => throughput is 3.04E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161368606547695E-007) differ by less than 4E-4 (8.269215387990414e-07)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542989697352719E-007) differ by less than 4E-4 (8.245628615455303e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.224802e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.155559e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.245567e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.172155e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680052283166904E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381686320975603E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6538s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3229s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3297s for     8192 events => throughput is 2.48E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0012s
+ [COUNTERS] PROGRAM TOTAL          :    0.7427s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4008s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.3409s for     8192 events => throughput is 2.40E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6680052283166904E-007) differ by less than 4E-4 (9.9445726053915e-07)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381686320975603E-007) differ by less than 4E-4 (9.944572607611946e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161383186590445E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    5.4776s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8422s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    3.6341s for    90112 events => throughput is 2.48E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0013s
+ [XSECTION] Cross section = 7.654e-07 [7.6543004237976207E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    5.5358s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1001s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    3.4347s for    81920 events => throughput is 2.39E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0010s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161383186590445E-007) differ by less than 4E-4 (1.0183578940115012e-06)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6543004237976207E-007) differ by less than 4E-4 (1.014529774634454e-06)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.465617e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.419635e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.529929e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.430312e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6680077090677233E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381711031958629E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7991s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7619s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.14E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0174s
+ [COUNTERS] PROGRAM TOTAL          :    0.8612s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8242s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.13E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0172s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6680077090677233E-007) differ by less than 4E-4 (1.3179773190596933e-06)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381711031958629E-007) differ by less than 4E-4 (1.3179773188376487e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161404612259676E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.4565s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2645s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1747s for    90112 events => throughput is 5.16E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0174s
+ [XSECTION] Cross section = 7.654e-07 [7.6543026921346333E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7207s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5435s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1599s for    81920 events => throughput is 5.12E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0172s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161404612259676E-007) differ by less than 4E-4 (1.2996775096141278e-06)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6543026921346333E-007) differ by less than 4E-4 (1.3108781262705094e-06)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.209228e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.203327e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.460189e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.444470e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.296236e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.300793e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.326771e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.322450e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.295726e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.296148e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.322025e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.323252e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.292225e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.292944e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.655822e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.656200e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index bfd919434f..2926bfb0ab 100644
--- a/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:14:56
+DATE: 2024-09-15_16:26:16
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 1 events (found 902 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6775s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3283s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3491s for     8192 events => throughput is 3.49E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.7071s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3810s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3260s for     8192 events => throughput is 3.52E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x1_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976028208006E-007] fbridge_mode=0
+ [XSECTION] Cross section = 7.638e-07 [7.6381610362728588E-007] fbridge_mode=0
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.6836s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3360s
- [COUNTERS] Fortran MEs      ( 1 ) :    2.3476s for     8192 events => throughput is 3.49E+03 events/s
+ [COUNTERS] PROGRAM TOTAL          :    2.6931s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3811s
+ [COUNTERS] Fortran MEs      ( 1 ) :    2.3120s for     8192 events => throughput is 3.54E+03 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_smeftggtttt_x10_fortran > /
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305627123644E-007] fbridge_mode=0
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   27.6096s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8766s
- [COUNTERS] Fortran MEs      ( 1 ) :   25.7330s for    90112 events => throughput is 3.50E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926582898148E-007] fbridge_mode=0
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   25.2391s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1065s
+ [COUNTERS] Fortran MEs      ( 1 ) :   23.1326s for    81920 events => throughput is 3.54E+03 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679974424193742E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381608764955655E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    2.8645s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3335s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    2.5256s for     8192 events => throughput is 3.24E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0055s
+ [COUNTERS] PROGRAM TOTAL          :    2.9243s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3965s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    2.5226s for     8192 events => throughput is 3.25E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974424193742E-007) differ by less than 2E-4 (2.0918293763827478e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608764955655E-007) differ by less than 2E-4 (2.0918293319738268e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161304067553537E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   29.0052s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8441s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   27.1557s for    90112 events => throughput is 3.32E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0054s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925018181681E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   27.2773s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1266s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   25.1455s for    81920 events => throughput is 3.26E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0052s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304067553537E-007) differ by less than 2E-4 (2.0477197604229502e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925018181681E-007) differ by less than 2E-4 (2.044233915476923e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.464819e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.405812e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.464133e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.414011e+03                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679974345453326E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381608686521600E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5765s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3234s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2502s for     8192 events => throughput is 6.55E+03 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0029s
+ [COUNTERS] PROGRAM TOTAL          :    1.6740s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3954s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    1.2759s for     8192 events => throughput is 6.42E+03 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974345453326E-007) differ by less than 2E-4 (2.1945164352388247e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608686521600E-007) differ by less than 2E-4 (2.1945164241365944e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161303969775166E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :   15.6221s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8267s
- [COUNTERS] CudaCpp MEs      ( 2 ) :   13.7926s for    90112 events => throughput is 6.53E+03 events/s
+ [XSECTION] Cross section = 7.654e-07 [7.6542924921991264E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :   14.9334s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1011s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :   12.8296s for    81920 events => throughput is 6.39E+03 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0027s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161303969775166E-007) differ by less than 2E-4 (2.1761030311040486e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542924921991264E-007) differ by less than 2E-4 (2.1699025132271288e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.901453e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.728250e+03                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.846222e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.779541e+03                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8933s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3306s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5610s for     8192 events => throughput is 1.46E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [COUNTERS] PROGRAM TOTAL          :    0.9679s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3921s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5743s for     8192 events => throughput is 1.43E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    7.9940s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8231s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    6.1692s for    90112 events => throughput is 1.46E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0017s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.8646s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.1024s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.7607s for    81920 events => throughput is 1.42E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.504497e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.464043e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.491789e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.467432e+04                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8199s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3252s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4932s for     8192 events => throughput is 1.66E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0015s
+ [COUNTERS] PROGRAM TOTAL          :    0.8994s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3907s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.5073s for     8192 events => throughput is 1.61E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    7.2217s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8152s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    5.4050s for    90112 events => throughput is 1.67E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0016s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    7.1742s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0971s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    5.0757s for    81920 events => throughput is 1.61E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0014s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.707424e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.629585e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.727329e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.667414e+04                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679974485677619E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381608826200266E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.9929s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3219s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6692s for     8192 events => throughput is 1.22E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0019s
+ [COUNTERS] PROGRAM TOTAL          :    1.0767s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.3890s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.6858s for     8192 events => throughput is 1.19E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cpp (7.6679974485677619E-007) differ by less than 2E-4 (2.0116469379161117e-08)
+OK! xsec from fortran (7.6381610362728588E-007) and cpp (7.6381608826200266E-007) differ by less than 2E-4 (2.0116469379161117e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161304099640839E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    9.1174s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.8280s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    7.2874s for    90112 events => throughput is 1.24E+04 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0020s
+ [XSECTION] Cross section = 7.654e-07 [7.6542925056010437E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    8.9698s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0925s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    6.8755s for    81920 events => throughput is 1.19E+04 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0018s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cpp (7.6161304099640839E-007) differ by less than 2E-4 (2.0055890503911655e-08)
+OK! xsec from fortran (7.6542926582898148E-007) and cpp (7.6542925056010437E-007) differ by less than 2E-4 (1.994812293126813e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.256200e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.207955e+04                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.250184e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.210184e+04                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 72 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.668e-07 [7.6679976038108255E-007] fbridge_mode=1
+ [XSECTION] Cross section = 7.638e-07 [7.6381610372590318E-007] fbridge_mode=1
  [UNWEIGHT] Wrote 230 events (found 851 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8003s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7603s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0200s for     8192 events => throughput is 4.11E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0201s
+ [COUNTERS] PROGRAM TOTAL          :    0.8633s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8236s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0198s for     8192 events => throughput is 4.13E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6679976028208006E-007) and cuda (7.6679976038108255E-007) differ by less than 2E-4 (1.2911116620273333e-10)
+OK! xsec from fortran (7.6381610362728588E-007) and cuda (7.6381610372590318E-007) differ by less than 2E-4 (1.2911138824733825e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_smeftggtttt_x10_cudacpp > /tmp/avalassi/output_smeftggtttt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 72 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 72 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 64/64
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 7.616e-07 [7.6161305624152697E-007] fbridge_mode=1
- [UNWEIGHT] Wrote 1833 events (found 1838 events)
- [COUNTERS] PROGRAM TOTAL          :    2.5121s
- [COUNTERS] Fortran Overhead ( 0 ) :    2.2973s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1947s for    90112 events => throughput is 4.63E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0200s
+ [XSECTION] Cross section = 7.654e-07 [7.6542926581386226E-007] fbridge_mode=1
+ [UNWEIGHT] Wrote 1679 events (found 1684 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.7183s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.5211s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1774s for    81920 events => throughput is 4.62E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0198s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (7.6161305627123644E-007) and cuda (7.6161305624152697E-007) differ by less than 2E-4 (3.9008574148624575e-11)
+OK! xsec from fortran (7.6542926582898148E-007) and cuda (7.6542926581386226E-007) differ by less than 2E-4 (1.9752643964920935e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.202288e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.181769e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.538720e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.501330e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.827213e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.823476e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.207516e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.163740e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.827263e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.836976e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.171035e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.201797e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.831697e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.682707e+05                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.676692e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.670380e+05                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index 3ef5a0426f..9ce56a2c20 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
 
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:09:20
+DATE: 2024-09-15_16:20:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
  [UNWEIGHT] Wrote 1732 events (found 4297 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6420s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6335s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.58E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7270s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7185s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.57E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3816s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3729s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.43E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4436s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4349s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0087s for     8192 events => throughput is 9.46E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4021s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3094s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0927s for    90112 events => throughput is 9.72E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6937s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6091s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0846s for    81920 events => throughput is 9.68E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3809s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0085s for     8192 events => throughput is 9.60E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4569s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4481s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for     8192 events => throughput is 9.75E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3773s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2846s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0923s for    90112 events => throughput is 9.77E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7096s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6246s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0847s for    81920 events => throughput is 9.68E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268059) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.000425e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.003482e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.004154e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.010577e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3776s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3725s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.77E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4412s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4364s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0045s for     8192 events => throughput is 1.84E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426120) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3454s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2951s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0498s for    90112 events => throughput is 1.81E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6661s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6195s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0463s for    81920 events => throughput is 1.77E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268059) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.864873e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.895351e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.965020e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.970667e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3790s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3757s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.88E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4516s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4482s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.66E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3169s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2855s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0309s for    90112 events => throughput is 2.91E+06 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6424s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6128s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0292s for    81920 events => throughput is 2.80E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.094157e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.076046e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.245967e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.350533e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3793s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3761s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4486s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4454s
  [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.92E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3127s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2835s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0288s for    90112 events => throughput is 3.13E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6351s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6077s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0271s for    81920 events => throughput is 3.03E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.093537e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.308950e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.505628e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.410024e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3786s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3747s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0033s for     8192 events => throughput is 2.45E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426114] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4531s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4495s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.57E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590697361620717) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449452343426114) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268065] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3245s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2901s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0339s for    90112 events => throughput is 2.66E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6424s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6109s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0311s for    81920 events => throughput is 2.64E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644073268065) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.858695e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.879828e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.223033e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.100504e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620711] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8115s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8078s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.67E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426109] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8776s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8739s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.62E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cuda (0.30590697361620711) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452343426109) differ by less than 3E-14 (3.3306690738754696e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7282s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7193s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for    90112 events => throughput is 1.08E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207283] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0680s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0594s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0081s for    81920 events => throughput is 1.02E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cuda (0.30752644073268059) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310722207283) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.084025e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.097292e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.467041e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.375155e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.542591e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.411895e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.896702e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.863144e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.496531e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.479845e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.940593e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.815670e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.503147e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.473337e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.228305e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.228599e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index e45c8dd3da..dbd145c063 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make USEBUILDDIR=1 BACKEND=cppsse4
 
+make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:09:44
+DATE: 2024-09-15_16:21:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
  [UNWEIGHT] Wrote 1732 events (found 4297 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6377s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6294s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0083s for     8192 events => throughput is 9.87E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7082s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.6996s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0086s for     8192 events => throughput is 9.53E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3812s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3728s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0084s for     8192 events => throughput is 9.74E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4406s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4322s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0085s for     8192 events => throughput is 9.68E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3990s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3066s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0924s for    90112 events => throughput is 9.75E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6930s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6080s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0850s for    81920 events => throughput is 9.64E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590691487682503] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3843s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3757s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0083s for     8192 events => throughput is 9.89E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449446496609361] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4545s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4459s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0083s for     8192 events => throughput is 9.81E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691487682503) differ by less than 4E-4 (1.9201713985506075e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446496609361) differ by less than 4E-4 (1.9201714018812766e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752638362648882] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3847s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2934s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0910s for    90112 events => throughput is 9.90E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305007079218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6930s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6098s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0829s for    81920 events => throughput is 9.88E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638362648882) differ by less than 4E-4 (1.8569522552969175e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305007079218) differ by less than 4E-4 (1.858740792393121e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.025283e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.007261e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.024362e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.017605e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590691359923711] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3754s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3724s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 3.09E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446369440458] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4435s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4405s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 2.99E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691359923711) differ by less than 4E-4 (1.961935334193754e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446369440458) differ by less than 4E-4 (1.961935339744869e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752638324844145] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3158s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2859s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0296s for    90112 events => throughput is 3.05E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747304961041555] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6362s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6083s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0277s for    81920 events => throughput is 2.96E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638324844145) differ by less than 4E-4 (1.8692454217816845e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747304961041555) differ by less than 4E-4 (1.8737136997515336e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.269308e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.284018e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.238619e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.432203e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3746s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3725s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0018s for     8192 events => throughput is 4.44E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4386s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4364s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0020s for     8192 events => throughput is 4.13E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691606590692) differ by less than 4E-4 (1.881300696338073e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3054s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2851s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0199s for    90112 events => throughput is 4.52E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6465s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6266s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0197s for    81920 events => throughput is 4.16E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638419881734) differ by less than 4E-4 (1.8383415467670972e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.715578e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.063003e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.352269e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.292038e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590691606590692] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3732s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0017s for     8192 events => throughput is 4.71E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3045 [0.30449446614968528] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4510s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4489s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0019s for     8192 events => throughput is 4.40E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590691606590692) differ by less than 4E-4 (1.881300696338073e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449446614968528) differ by less than 4E-4 (1.881300697448296e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752638419881734] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3384s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3179s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0202s for    90112 events => throughput is 4.46E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 0.3075 [0.30747305065199410] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6259s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6077s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0179s for    81920 events => throughput is 4.57E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638419881734) differ by less than 4E-4 (1.8383415467670972e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305065199410) differ by less than 4E-4 (1.839838263961724e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.261579e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.244642e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.564280e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.614839e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590692025204030] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3887s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3860s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0022s for     8192 events => throughput is 3.72E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3045 [0.30449447031649013] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4486s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4460s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0023s for     8192 events => throughput is 3.50E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590692025204030) differ by less than 4E-4 (1.744457350794093e-07)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449447031649013) differ by less than 4E-4 (1.744457354124762e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752638860024578] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3283s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3048s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0231s for    90112 events => throughput is 3.90E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747305508949557] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6554s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6328s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0223s for    81920 events => throughput is 3.67E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752638860024578) differ by less than 4E-4 (1.6952179682228063e-07)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747305508949557) differ by less than 4E-4 (1.6955166515231213e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.404995e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.412735e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.960556e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.638041e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590692347055715] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8105s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8069s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.73E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3045 [0.30449447352014630] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8784s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8748s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.68E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cuda (0.30590692347055715) differ by less than 4E-4 (1.63924507634583e-07)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449447352014630) differ by less than 4E-4 (1.639245078566276e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752639119626163] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7523s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7434s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for    90112 events => throughput is 1.08E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3075 [0.30747305761315818] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0625s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0542s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0078s for    81920 events => throughput is 1.05E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cuda (0.30752639119626163) differ by less than 4E-4 (1.6108019473826118e-07)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747305761315818) differ by less than 4E-4 (1.6134391445099538e-07)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.178526e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.154441e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.421541e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.353273e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.232026e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.331367e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.092431e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.058204e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.371466e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.463227e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.089703e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.109516e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.938227e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.155108e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.621151e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.640171e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index 13ea6d6457..2e41ca2cbf 100644
--- a/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
-
-make USEBUILDDIR=1 BACKEND=cuda
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 
+make USEBUILDDIR=1 BACKEND=cuda
 make USEBUILDDIR=1 BACKEND=cppnone
 
+
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:10:08
+DATE: 2024-09-15_16:21:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
  [UNWEIGHT] Wrote 1732 events (found 4297 events)
- [COUNTERS] PROGRAM TOTAL          :    0.6471s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.6387s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0084s for     8192 events => throughput is 9.76E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.7195s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.7107s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0088s for     8192 events => throughput is 9.32E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x1_fortran > /tm
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697361620717] fbridge_mode=0
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3844s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3756s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0089s for     8192 events => throughput is 9.22E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452343426120] fbridge_mode=0
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4498s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4410s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0088s for     8192 events => throughput is 9.32E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggt1t1_x10_fortran > /t
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644073268059] fbridge_mode=0
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3978s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3060s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0918s for    90112 events => throughput is 9.82E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747310722207288] fbridge_mode=0
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7106s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6251s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0855s for    81920 events => throughput is 9.58E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3797s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3710s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0083s for     8192 events => throughput is 9.91E+05 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4646s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4554s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0088s for     8192 events => throughput is 9.31E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698182878584) differ by less than 2E-4 (2.6846654010981297e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3704s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2791s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0909s for    90112 events => throughput is 9.91E+05 events/s
+ [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7066s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6200s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0862s for    81920 events => throughput is 9.50E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644886297176) differ by less than 2E-4 (2.6437698030790102e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.886423e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.753908e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.874996e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.000196e+06                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590698182878584] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3769s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.79E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449453160892032] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4546s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4497s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0046s for     8192 events => throughput is 1.77E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698182878584) differ by less than 2E-4 (2.6846654010981297e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453160892032) differ by less than 2E-4 (2.6846654010981297e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644886297176] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3326s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2834s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0488s for    90112 events => throughput is 1.85E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747311535940236] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6465s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6016s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0447s for    81920 events => throughput is 1.83E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644886297176) differ by less than 2E-4 (2.6437698030790102e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311535940236) differ by less than 2E-4 (2.6465174718381945e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.911267e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.842568e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.968214e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.939712e+06                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3750s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3718s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0028s for     8192 events => throughput is 2.92E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4475s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4441s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.71E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3191s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2888s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0299s for    90112 events => throughput is 3.02E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6359s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6071s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0284s for    81920 events => throughput is 2.88E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.241118e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.319543e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.286119e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.469317e+06                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3761s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3730s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0026s for     8192 events => throughput is 3.16E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4549s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4518s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0027s for     8192 events => throughput is 2.99E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3399s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.3098s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0297s for    90112 events => throughput is 3.03E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6340s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6069s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0268s for    81920 events => throughput is 3.06E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.473724e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.335525e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.561982e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.608974e+06                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590698277712874] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3785s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3749s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.67E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3045 [0.30449453255288433] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4504s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4468s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.52E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cpp (0.30590698277712874) differ by less than 2E-4 (2.994675618595011e-08)
+OK! xsec from fortran (0.30449452343426120) and cpp (0.30449453255288433) differ by less than 2E-4 (2.99467557418609e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644968184478] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3246s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2913s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0328s for    90112 events => throughput is 2.75E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 0.3075 [0.30747311619894635] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6561s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.6249s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0307s for    81920 events => throughput is 2.67E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cpp (0.30752644968184478) differ by less than 2E-4 (2.9100470699816583e-08)
+OK! xsec from fortran (0.30747310722207288) and cpp (0.30747311619894635) differ by less than 2E-4 (2.9195637685219822e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.874323e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.036868e+06                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.161810e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.195892e+06                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 6 channels { 3 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3059 [0.30590697378458576] fbridge_mode=1
- [UNWEIGHT] Wrote 1609 events (found 1614 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8101s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8064s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
+ [XSECTION] Cross section = 0.3045 [0.30449452360186230] fbridge_mode=1
+ [UNWEIGHT] Wrote 1612 events (found 1617 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8785s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8749s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.70E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30590697361620717) and cuda (0.30590697378458576) differ by less than 2E-4 (5.504241507026109e-10)
+OK! xsec from fortran (0.30449452343426120) and cuda (0.30449452360186230) differ by less than 2E-4 (5.504239286580059e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 2 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggt1t1_x10_cudacpp > /tmp/avalassi/output_susyggt1t1_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 6 channels { 3 : 90112 }
+DEBUG: MEK processed 81920 events across 6 channels { 3 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 4/4
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 2
  [XSECTION] ChannelId = 3
- [XSECTION] Cross section = 0.3075 [0.30752644069868873] fbridge_mode=1
- [UNWEIGHT] Wrote 1788 events (found 1793 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7220s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7132s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0082s for    90112 events => throughput is 1.09E+07 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
+ [XSECTION] Cross section = 0.3075 [0.30747310720557364] fbridge_mode=1
+ [UNWEIGHT] Wrote 1631 events (found 1636 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0629s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0544s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0080s for    81920 events => throughput is 1.02E+07 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (0.30752644073268059) and cuda (0.30752644069868873) differ by less than 2E-4 (1.1053313819786581e-10)
+OK! xsec from fortran (0.30747310722207288) and cuda (0.30747310720557364) differ by less than 2E-4 (5.366074251611508e-11)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.095622e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.079478e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.536574e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.384918e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.536213e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.482107e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.962484e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.823345e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.497581e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.501214e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.884190e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.930940e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.529353e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.488831e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.228927e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.231647e+08                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index ba2780b6c2..62e2554b8b 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
 make USEBUILDDIR=1 BACKEND=cppnone
 
 
-make USEBUILDDIR=1 BACKEND=cppavx2
 make USEBUILDDIR=1 BACKEND=cppsse4
+make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:08:02
+DATE: 2024-09-15_16:19:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
  [UNWEIGHT] Wrote 2625 events (found 5368 events)
- [COUNTERS] PROGRAM TOTAL          :    0.7962s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7546s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0416s for     8192 events => throughput is 1.97E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8824s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8399s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0425s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4104s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3684s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4660s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4237s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0423s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7386s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2815s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4571s for    90112 events => throughput is 1.97E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9963s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5726s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4237s for    81920 events => throughput is 1.93E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848757] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4134s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3696s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0434s for     8192 events => throughput is 1.89E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641911695846964] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4815s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4345s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0465s for     8192 events => throughput is 1.76E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848757) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846964) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7632s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2798s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4829s for    90112 events => throughput is 1.87E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473264592444664] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0315s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5822s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4490s for    81920 events => throughput is 1.82E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444664) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.887422e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.850829e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.910992e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.847409e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3962s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0245s for     8192 events => throughput is 3.34E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4690s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4433s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0254s for     8192 events => throughput is 3.23E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848742) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846957) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5530s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2804s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2721s for    90112 events => throughput is 3.31E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8264s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5731s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2530s for    81920 events => throughput is 3.24E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.355488e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.295503e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.318491e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.383060e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3868s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3712s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0152s for     8192 events => throughput is 5.39E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4583s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4424s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.25E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4438s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2739s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1694s for    90112 events => throughput is 5.32E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7403s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5823s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1576s for    81920 events => throughput is 5.20E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.232887e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.346054e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.260759e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.315504e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3866s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3723s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0138s for     8192 events => throughput is 5.92E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4533s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4383s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0146s for     8192 events => throughput is 5.60E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4280s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2729s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1547s for    90112 events => throughput is 5.83E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7255s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5809s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1443s for    81920 events => throughput is 5.68E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.790244e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.745425e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.804559e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.928976e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848735] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3930s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3710s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0216s for     8192 events => throughput is 3.79E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4775s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4535s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0236s for     8192 events => throughput is 3.47E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690956764848735) differ by less than 3E-14 (1.1102230246251565e-16)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_d_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5258s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2802s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2451s for    90112 events => throughput is 3.68E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8170s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5877s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2290s for    81920 events => throughput is 3.58E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411959203413062) differ by less than 3E-14 (0.0)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473264592444671) differ by less than 3E-14 (0.0)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.615689e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.544976e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.658613e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.602571e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848728] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8129s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8089s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.60E+06 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846950] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8820s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8781s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.64E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cuda (44.690956764848728) differ by less than 3E-14 (3.3306690738754696e-16)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641911695846950) differ by less than 3E-14 (1.1102230246251565e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_d_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413069] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7214s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7107s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0098s for    90112 events => throughput is 9.16E+06 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444679] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0328s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0229s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 9.01E+06 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cuda (44.411959203413069) differ by less than 3E-14 (2.220446049250313e-16)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473264592444679) differ by less than 3E-14 (2.220446049250313e-16)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.991537e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.708123e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.441995e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.136288e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.877774e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.859628e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.619188e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.597476e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.871906e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.838367e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.013278e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.970647e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.893751e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.822341e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.689301e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.671790e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index bb0fae6bdf..5ea9a274c7 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-make USEBUILDDIR=1 BACKEND=cppnone
-
-
 
+make USEBUILDDIR=1 BACKEND=cppnone
 make USEBUILDDIR=1 BACKEND=cppsse4
+
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:08:28
+DATE: 2024-09-15_16:19:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
  [UNWEIGHT] Wrote 2625 events (found 5368 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8084s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7664s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0420s for     8192 events => throughput is 1.95E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8639s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8212s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0426s for     8192 events => throughput is 1.92E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4065s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3644s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4685s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4259s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0425s for     8192 events => throughput is 1.93E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7405s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2818s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4587s for    90112 events => throughput is 1.96E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.9998s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5755s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4243s for    81920 events => throughput is 1.93E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690951135742296] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4120s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3701s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0414s for     8192 events => throughput is 1.98E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641906072918047] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4862s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4438s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0421s for     8192 events => throughput is 1.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690951135742296) differ by less than 4E-4 (1.2595627518763308e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641906072918047) differ by less than 4E-4 (1.2595627507661078e-07)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411953404075810] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7277s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2737s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4536s for    90112 events => throughput is 1.99E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473258789404959] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0026s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5791s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4232s for    81920 events => throughput is 1.94E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411953404075810) differ by less than 4E-4 (1.305805318319031e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473258789404959) differ by less than 4E-4 (1.3048378089131063e-07)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.018259e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.962000e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.007607e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.971836e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690947248027847] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3896s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3724s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0169s for     8192 events => throughput is 4.86E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902189470080] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4548s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4371s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0174s for     8192 events => throughput is 4.71E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690947248027847) differ by less than 4E-4 (2.1294735186305758e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902189470080) differ by less than 4E-4 (2.1294735186305758e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411949727730686] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4603s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2754s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1846s for    90112 events => throughput is 4.88E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473255074265531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7464s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5743s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1719s for    81920 events => throughput is 4.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411949727730686) differ by less than 4E-4 (2.1335880118211747e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473255074265531) differ by less than 4E-4 (2.1402024852346102e-07)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.698069e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.761360e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.690804e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.758225e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3758s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3665s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0090s for     8192 events => throughput is 9.09E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4564s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4469s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0092s for     8192 events => throughput is 8.88E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690947419182343) differ by less than 4E-4 (2.0911761744457635e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3662s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2675s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0984s for    90112 events => throughput is 9.16E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6633s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5720s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0911s for    81920 events => throughput is 8.99E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411949260921247) differ by less than 4E-4 (2.238696962253428e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.017679e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.095324e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.191081e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.260486e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690947419182343] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3811s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3724s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0084s for     8192 events => throughput is 9.80E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.64 [44.641902360436738] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4530s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4440s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0087s for     8192 events => throughput is 9.41E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690947419182343) differ by less than 4E-4 (2.0911761744457635e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641902360436738) differ by less than 4E-4 (2.0911761755559866e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411949260921247] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.3609s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2686s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0920s for    90112 events => throughput is 9.79E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
+ [XSECTION] Cross section = 44.47 [44.473254628666531] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.6712s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5843s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0866s for    81920 events => throughput is 9.46E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411949260921247) differ by less than 4E-4 (2.238696962253428e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473254628666531) differ by less than 4E-4 (2.240397288799656e-07)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.778277e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.845166e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 9.668348e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.869743e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690951463003671] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3822s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3702s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0116s for     8192 events => throughput is 7.04E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.64 [44.641906399820272] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4622s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4498s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0121s for     8192 events => throughput is 6.78E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690951463003671) differ by less than 4E-4 (1.1863351012664225e-07)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641906399820272) differ by less than 4E-4 (1.1863351012664225e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_f_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411953494761157] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4124s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2839s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1281s for    90112 events => throughput is 7.03E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473258854390501] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7035s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5822s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1211s for    81920 events => throughput is 6.77E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0002s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411953494761157) differ by less than 4E-4 (1.2853861908190822e-07)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473258854390501) differ by less than 4E-4 (1.2902255375202287e-07)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.876672e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.814469e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 6.873464e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.887986e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956060520207] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8062s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8025s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0031s for     8192 events => throughput is 2.66E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
+ [XSECTION] Cross section = 44.64 [44.641910992291372] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8659s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8622s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0030s for     8192 events => throughput is 2.69E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cuda (44.690956060520207) differ by less than 4E-4 (1.575997887748315e-08)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641910992291372) differ by less than 4E-4 (1.575997887748315e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_f_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411957327369002] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7214s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7121s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0087s for    90112 events => throughput is 1.04E+07 events/s
+ [XSECTION] Cross section = 44.47 [44.473262664842089] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0160s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0074s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0080s for    81920 events => throughput is 1.02E+07 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0006s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cuda (44.411957327369002) differ by less than 4E-4 (4.22418666712332e-08)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473262664842089) differ by less than 4E-4 (4.334295222729878e-08)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.086629e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.873489e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.446297e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.285658e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.792358e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.913803e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.318678e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.363404e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.741693e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.939202e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.332831e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.391621e+08                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 4.395625e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.636477e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.005588e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.005101e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index ff3b8bf7c7..a80fb58d5a 100644
--- a/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tmad/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,41 +1,41 @@
-Working directory (build): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (build): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 make USEBUILDDIR=1 BACKEND=cuda
 
-
 make USEBUILDDIR=1 BACKEND=cppnone
 
 
 make USEBUILDDIR=1 BACKEND=cppsse4
 make USEBUILDDIR=1 BACKEND=cppavx2
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 OMP_NUM_THREADS=
 
-DATE: 2024-09-02_11:08:54
+DATE: 2024-09-15_16:20:06
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
-Working directory (run): /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Working directory (run): /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 
 *** (1) EXECUTE MADEVENT_FORTRAN (create results.dat) ***
 --------------------
@@ -56,11 +56,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
  [UNWEIGHT] Wrote 2625 events (found 5368 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8079s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.7642s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0437s for     8192 events => throughput is 1.87E+05 events/s
+ [COUNTERS] PROGRAM TOTAL          :    0.8574s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8153s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x1 (create events.lhe) ***
 --------------------
@@ -81,11 +81,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x1_fortran > /tmp/
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956764848742] fbridge_mode=0
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4123s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3693s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.0430s for     8192 events => throughput is 1.91E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641911695846957] fbridge_mode=0
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4665s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4245s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.0421s for     8192 events => throughput is 1.95E+05 events/s
 
 *** (1) EXECUTE MADEVENT_FORTRAN x10 (create events.lhe) ***
 --------------------
@@ -106,11 +106,11 @@ Executing ' ./madevent_fortran < /tmp/avalassi/input_susyggtt_x10_fortran > /tmp
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959203413062] fbridge_mode=0
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7537s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2906s
- [COUNTERS] Fortran MEs      ( 1 ) :    0.4631s for    90112 events => throughput is 1.95E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473264592444671] fbridge_mode=0
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0185s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5937s
+ [COUNTERS] Fortran MEs      ( 1 ) :    0.4248s for    81920 events => throughput is 1.93E+05 events/s
 
 *** (2-none) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -132,16 +132,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.4202s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3749s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0449s for     8192 events => throughput is 1.83E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4844s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4381s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0458s for     8192 events => throughput is 1.79E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690958008771119) differ by less than 2E-4 (2.783387209603916e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08)
 
 *** (2-none) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -160,23 +160,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.none_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7663s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2776s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4883s for    90112 events => throughput is 1.85E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0381s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5844s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.4534s for    81920 events => throughput is 1.81E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-none) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411960462697984) differ by less than 2E-4 (2.835463575046049e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08)
 
 *** (2-none) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -185,12 +185,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.847731e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.855837e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 1.855305e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.855798e+05                 )  sec^-1
 
 *** (2-sse4) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -212,16 +212,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690958008771119] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3990s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3738s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0247s for     8192 events => throughput is 3.31E+05 events/s
+ [XSECTION] Cross section = 44.64 [44.641912938404218] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4621s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4365s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0252s for     8192 events => throughput is 3.25E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690958008771119) differ by less than 2E-4 (2.783387209603916e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912938404218) differ by less than 2E-4 (2.783387209603916e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -240,23 +240,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.sse4_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411960462697984] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5614s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2909s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2701s for    90112 events => throughput is 3.34E+05 events/s
+ [XSECTION] Cross section = 44.47 [44.473265850735231] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8351s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5852s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2495s for    81920 events => throughput is 3.28E+05 events/s
  [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-sse4) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411960462697984) differ by less than 2E-4 (2.835463575046049e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265850735231) differ by less than 2E-4 (2.8293190679207214e-08)
 
 *** (2-sse4) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -265,12 +265,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.371536e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.330553e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.411767e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.338265e+05                 )  sec^-1
 
 *** (2-avx2) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -292,16 +292,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3924s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3767s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0152s for     8192 events => throughput is 5.38E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4575s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4416s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0156s for     8192 events => throughput is 5.26E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-avx2) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -320,23 +320,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.avx2_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4425s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2733s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1688s for    90112 events => throughput is 5.34E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7359s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5815s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1540s for    81920 events => throughput is 5.32E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-avx2) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-avx2) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -345,12 +345,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.286155e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.295263e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.374034e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.300898e+05                 )  sec^-1
 
 *** (2-512y) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -372,16 +372,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3926s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3780s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0142s for     8192 events => throughput is 5.79E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4549s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4403s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0142s for     8192 events => throughput is 5.76E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -400,23 +400,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512y_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.4350s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2799s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1546s for    90112 events => throughput is 5.83E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.7287s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5853s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.1430s for    81920 events => throughput is 5.73E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0003s
 
 *** (2-512y) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-512y) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -425,12 +425,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.918164e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.880539e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 5.938830e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.012183e+05                 )  sec^-1
 
 *** (2-512z) EXECUTE MADEVENT_CPP x1 (create events.lhe) ***
 --------------------
@@ -452,16 +452,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690958040780203] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.3936s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.3719s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0211s for     8192 events => throughput is 3.88E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.64 [44.641912970378179] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.4599s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.4371s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0224s for     8192 events => throughput is 3.66E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cpp (44.690958040780203) differ by less than 2E-4 (2.8550103836622043e-08)
+OK! xsec from fortran (44.641911695846957) and cpp (44.641912970378179) differ by less than 2E-4 (2.8550104058666648e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -480,23 +480,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.512z_m_inl0_hrd0/madevent_cpp < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411960501666542] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.5189s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.2824s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2360s for    90112 events => throughput is 3.82E+05 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0005s
+ [XSECTION] Cross section = 44.47 [44.473265889684782] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    1.8085s
+ [COUNTERS] Fortran Overhead ( 0 ) :    1.5870s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.2211s for    81920 events => throughput is 3.71E+05 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0004s
 
 *** (2-512z) Compare MADEVENT_CPP x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cpp (44.411960501666542) differ by less than 2E-4 (2.923206943172829e-08)
+OK! xsec from fortran (44.473264592444671) and cpp (44.473265889684782) differ by less than 2E-4 (2.9168987669692115e-08)
 
 *** (2-512z) Compare MADEVENT_CPP x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -505,12 +505,12 @@ OK! events.lhe.cpp.10 and events.lhe.ref.10 are identical
 *** EXECUTE CHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.768467e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.675547e+05                 )  sec^-1
 
 *** EXECUTE CHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.824225e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.724298e+05                 )  sec^-1
 
 *** (3-cuda) EXECUTE MADEVENT_CUDA x1 (create events.lhe) ***
 --------------------
@@ -532,16 +532,16 @@ DEBUG: MEK processed 8192 events across 3 channels { 1 : 8192 }
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.69 [44.690956743203600] fbridge_mode=1
- [UNWEIGHT] Wrote 1616 events (found 1621 events)
- [COUNTERS] PROGRAM TOTAL          :    0.8164s
- [COUNTERS] Fortran Overhead ( 0 ) :    0.8123s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.58E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0009s
+ [XSECTION] Cross section = 44.64 [44.641911674225568] fbridge_mode=1
+ [UNWEIGHT] Wrote 1617 events (found 1622 events)
+ [COUNTERS] PROGRAM TOTAL          :    0.8739s
+ [COUNTERS] Fortran Overhead ( 0 ) :    0.8700s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0032s for     8192 events => throughput is 2.59E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.690956764848742) and cuda (44.690956743203600) differ by less than 2E-4 (4.843293543999039e-10)
+OK! xsec from fortran (44.641911695846957) and cuda (44.641911674225568) differ by less than 2E-4 (4.843293543999039e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x1 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -560,23 +560,23 @@ CUDACPP_RUNTIME_VECSIZEUSED = 8192
 1 ! ICONFIG number (1-N) for single-diagram enhancement multi-channel (NB used even if suppress amplitude is 0!)
 --------------------
 Executing ' ./build.cuda_m_inl0_hrd0/madevent_cuda < /tmp/avalassi/input_susyggtt_x10_cudacpp > /tmp/avalassi/output_susyggtt_x10_cudacpp'
-DEBUG: MEK processed 90112 events across 3 channels { 1 : 90112 }
+DEBUG: MEK processed 81920 events across 3 channels { 1 : 81920 }
  [OPENMPTH] omp_get_max_threads/nproc = 1/4
  [NGOODHEL] ngoodhel/ncomb = 16/16
  [XSECTION] VECSIZE_USED = 8192
  [XSECTION] MultiChannel = TRUE
  [XSECTION] Configuration = 1
  [XSECTION] ChannelId = 1
- [XSECTION] Cross section = 44.41 [44.411959201897950] fbridge_mode=1
- [UNWEIGHT] Wrote 1775 events (found 1780 events)
- [COUNTERS] PROGRAM TOTAL          :    1.7393s
- [COUNTERS] Fortran Overhead ( 0 ) :    1.7287s
- [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0098s for    90112 events => throughput is 9.21E+06 events/s
- [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0008s
+ [XSECTION] Cross section = 44.47 [44.473264587763374] fbridge_mode=1
+ [UNWEIGHT] Wrote 1622 events (found 1627 events)
+ [COUNTERS] PROGRAM TOTAL          :    2.0197s
+ [COUNTERS] Fortran Overhead ( 0 ) :    2.0099s
+ [COUNTERS] CudaCpp MEs      ( 2 ) :    0.0091s for    81920 events => throughput is 9.03E+06 events/s
+ [COUNTERS] CudaCpp HEL      ( 3 ) :    0.0007s
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 xsec to MADEVENT_FORTRAN xsec ***
 
-OK! xsec from fortran (44.411959203413062) and cuda (44.411959201897950) differ by less than 2E-4 (3.411493310068181e-11)
+OK! xsec from fortran (44.473264592444671) and cuda (44.473264587763374) differ by less than 2E-4 (1.0526091109852587e-10)
 
 *** (3-cuda) Compare MADEVENT_CUDA x10 events.lhe to MADEVENT_FORTRAN events.lhe reference (including colors and helicities) ***
 
@@ -585,42 +585,42 @@ OK! events.lhe.cuda.10 and events.lhe.ref.10 are identical
 *** EXECUTE GCHECK(8192) -p 256 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.004903e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.935715e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(8192) -p 256 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 3.420646e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.223055e+06                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.892002e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.837385e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX) -p 16384 32 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 7.666542e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.518211e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.876265e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.850328e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX128THR) -p 4096 128 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 8.014192e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.872009e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 --bridge ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.868624e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.860604e+07                 )  sec^-1
 
 *** EXECUTE GCHECK(MAX8THR) -p 65536 8 1 ***
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
-EvtsPerSec[MECalcOnly] (3a) = ( 2.693794e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.713800e+07                 )  sec^-1
 
 *** (3-hip) WARNING! SKIP MADEVENT_HIP (hip is not supported on this node) ***
 
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
index 6dabcaeadc..1292ed24b8 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:11:08
+DATE: 2024-09-15_11:08:03
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.316341e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.700853e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.825632e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.330379e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.527996e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.788543e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.678258 sec
+TOTAL       :     0.812638 sec
 INFO: No Floating Point Exceptions have been reported
-     2,598,429,057      cycles                           #    2.870 GHz                    
-     4,016,594,161      instructions                     #    1.55  insn per cycle         
-       0.970694128 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,711,766,628      cycles                           #    2.867 GHz                    
+     4,239,903,132      instructions                     #    1.56  insn per cycle         
+       1.138564764 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.045966e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.218041e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.218041e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.032481e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.205909e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.205909e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.419615 sec
+TOTAL       :     6.541164 sec
 INFO: No Floating Point Exceptions have been reported
-    19,020,182,633      cycles                           #    2.961 GHz                    
-    46,088,995,833      instructions                     #    2.42  insn per cycle         
-       6.424712247 seconds time elapsed
+    19,214,248,144      cycles                           #    2.933 GHz                    
+    46,179,436,349      instructions                     #    2.40  insn per cycle         
+       6.552095575 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.554135e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.028420e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.028420e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.566602e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.052859e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.052859e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.470437 sec
+TOTAL       :     4.470271 sec
 INFO: No Floating Point Exceptions have been reported
-    12,988,824,758      cycles                           #    2.911 GHz                    
-    31,629,492,055      instructions                     #    2.44  insn per cycle         
-       4.475693569 seconds time elapsed
+    13,145,357,361      cycles                           #    2.934 GHz                    
+    31,720,883,797      instructions                     #    2.41  insn per cycle         
+       4.481479023 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.991760e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.780070e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.780070e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.961947e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.743984e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.743984e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.561283 sec
+TOTAL       :     3.647893 sec
 INFO: No Floating Point Exceptions have been reported
-    10,090,609,279      cycles                           #    2.830 GHz                    
-    19,603,699,574      instructions                     #    1.94  insn per cycle         
-       3.566410563 seconds time elapsed
+    10,212,054,728      cycles                           #    2.792 GHz                    
+    19,686,910,587      instructions                     #    1.93  insn per cycle         
+       3.658422867 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1912) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.057332e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.885467e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.885467e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.012892e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.837375e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.837375e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.457613 sec
+TOTAL       :     3.568887 sec
 INFO: No Floating Point Exceptions have been reported
-     9,859,470,478      cycles                           #    2.848 GHz                    
-    19,269,255,487      instructions                     #    1.95  insn per cycle         
-       3.462839764 seconds time elapsed
+    10,042,390,879      cycles                           #    2.806 GHz                    
+    19,342,891,969      instructions                     #    1.93  insn per cycle         
+       3.579550757 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1651) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.774988e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.357712e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.357712e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.687611e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.233598e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.233598e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.947870 sec
+TOTAL       :     4.179012 sec
 INFO: No Floating Point Exceptions have been reported
-     8,591,487,226      cycles                           #    2.174 GHz                    
-    15,733,547,778      instructions                     #    1.83  insn per cycle         
-       3.953082251 seconds time elapsed
+     8,766,087,418      cycles                           #    2.093 GHz                    
+    15,826,503,490      instructions                     #    1.81  insn per cycle         
+       4.190350116 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  848) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
index c06c4fd582..656f6e2f98 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:41:41
+DATE: 2024-09-15_11:45:05
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.718927e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.650908e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.650908e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.206256e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.682542e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.682542e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     2.222857 sec
+TOTAL       :     2.457281 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,357,681,022      cycles                           #    2.969 GHz                    
-    13,116,457,275      instructions                     #    1.78  insn per cycle         
-       2.534653477 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
+     7,617,652,543      cycles                           #    2.830 GHz                    
+    12,995,599,451      instructions                     #    1.71  insn per cycle         
+       2.778749807 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.024615e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.188948e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.188948e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.578808e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.114252e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.114252e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.745071 sec
+TOTAL       :     7.297311 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    20,290,788,803      cycles                           #    3.006 GHz                    
-    46,319,861,743      instructions                     #    2.28  insn per cycle         
-       6.751750408 seconds time elapsed
+    20,775,260,342      cycles                           #    2.853 GHz                    
+    46,581,102,942      instructions                     #    2.24  insn per cycle         
+       7.320240357 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.540760e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.976659e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.976659e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.428539e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.841895e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.841895e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.692607 sec
+TOTAL       :     5.145544 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    14,149,693,879      cycles                           #    3.012 GHz                    
-    32,468,658,455      instructions                     #    2.29  insn per cycle         
-       4.699232929 seconds time elapsed
+    14,656,531,148      cycles                           #    2.850 GHz                    
+    32,719,868,481      instructions                     #    2.23  insn per cycle         
+       5.168221991 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.899701e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.593761e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.593761e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.782197e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.438081e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.438081e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.930700 sec
+TOTAL       :     4.265326 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    11,314,229,464      cycles                           #    2.874 GHz                    
-    20,966,302,309      instructions                     #    1.85  insn per cycle         
-       3.937371007 seconds time elapsed
+    11,667,988,214      cycles                           #    2.728 GHz                    
+    21,208,810,330      instructions                     #    1.82  insn per cycle         
+       4.287433901 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1912) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.940443e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.649272e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.649272e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.824257e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.501728e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.501728e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.855778 sec
+TOTAL       :     4.177936 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    11,163,065,611      cycles                           #    2.891 GHz                    
-    20,629,365,545      instructions                     #    1.85  insn per cycle         
-       3.862281727 seconds time elapsed
+    11,439,960,592      cycles                           #    2.728 GHz                    
+    20,869,154,642      instructions                     #    1.82  insn per cycle         
+       4.198165961 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1651) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.664453e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.162165e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.162165e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.555883e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.022583e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.022583e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.405270 sec
+TOTAL       :     4.778023 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,892,759,175      cycles                           #    2.243 GHz                    
-    16,881,792,429      instructions                     #    1.71  insn per cycle         
-       4.412033088 seconds time elapsed
+    10,270,194,102      cycles                           #    2.143 GHz                    
+    17,125,695,085      instructions                     #    1.67  insn per cycle         
+       4.797944534 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  848) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
index 7182e1a444..c883b5b3b2 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:53:41
+DATE: 2024-09-15_11:57:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.461368e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.445287e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.624818e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.026987e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.683583e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.857936e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     1.332517 sec
+TOTAL       :     1.462390 sec
 INFO: No Floating Point Exceptions have been reported
-     4,633,809,881      cycles                           #    2.948 GHz                    
-     7,263,887,198      instructions                     #    1.57  insn per cycle         
-       1.628488212 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
+     4,897,119,539      cycles                           #    2.892 GHz                    
+     7,502,819,293      instructions                     #    1.53  insn per cycle         
+       1.751403177 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.048373e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.222413e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.222413e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.029304e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.201451e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.201451e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     6.768902 sec
+TOTAL       :     7.039422 sec
 INFO: No Floating Point Exceptions have been reported
-    20,114,199,279      cycles                           #    2.970 GHz                    
-    46,191,365,075      instructions                     #    2.30  insn per cycle         
-       6.774087692 seconds time elapsed
+    20,621,489,655      cycles                           #    2.926 GHz                    
+    46,653,049,885      instructions                     #    2.26  insn per cycle         
+       7.049549267 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.604747e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.094842e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.094842e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.563131e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.046448e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.046448e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.696978 sec
+TOTAL       :     4.950021 sec
 INFO: No Floating Point Exceptions have been reported
-    14,035,189,999      cycles                           #    2.985 GHz                    
-    31,625,158,467      instructions                     #    2.25  insn per cycle         
-       4.702368322 seconds time elapsed
+    14,503,935,115      cycles                           #    2.925 GHz                    
+    32,091,166,775      instructions                     #    2.21  insn per cycle         
+       4.960008276 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.023804e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.826844e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.826844e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.961733e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.745410e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.745410e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.873757 sec
+TOTAL       :     4.125348 sec
 INFO: No Floating Point Exceptions have been reported
-    11,134,547,212      cycles                           #    2.871 GHz                    
-    19,508,154,855      instructions                     #    1.75  insn per cycle         
-       3.879105882 seconds time elapsed
+    11,625,768,108      cycles                           #    2.812 GHz                    
+    19,969,403,537      instructions                     #    1.72  insn per cycle         
+       4.135325887 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1912) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.075999e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.926845e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.926845e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.013648e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.832922e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.832922e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     3.800700 sec
+TOTAL       :     4.042328 sec
 INFO: No Floating Point Exceptions have been reported
-    10,969,458,533      cycles                           #    2.883 GHz                    
-    18,956,382,786      instructions                     #    1.73  insn per cycle         
-       3.806025703 seconds time elapsed
+    11,410,026,502      cycles                           #    2.817 GHz                    
+    19,423,165,232      instructions                     #    1.70  insn per cycle         
+       4.052144921 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1651) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.779178e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.363282e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.363282e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.712486e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.267405e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.267405e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371879e-02 +- 3.270020e-06 )  GeV^0
-TOTAL       :     4.314005 sec
+TOTAL       :     4.597004 sec
 INFO: No Floating Point Exceptions have been reported
-     9,739,763,283      cycles                           #    2.256 GHz                    
-    15,434,105,054      instructions                     #    1.58  insn per cycle         
-       4.319565293 seconds time elapsed
+    10,166,885,638      cycles                           #    2.209 GHz                    
+    15,890,691,650      instructions                     #    1.56  insn per cycle         
+       4.606740948 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  848) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
index 33ead1e19d..be71099caf 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:50:58
+DATE: 2024-09-15_11:54:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.500094e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.517548e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.743547e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.198136e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.691819e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.863296e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.980790 sec
+TOTAL       :     0.975349 sec
 INFO: No Floating Point Exceptions have been reported
-     3,560,791,476      cycles                           #    2.941 GHz                    
-     7,160,309,799      instructions                     #    2.01  insn per cycle         
-       1.268112972 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
+     3,491,422,658      cycles                           #    2.884 GHz                    
+     6,978,748,855      instructions                     #    2.00  insn per cycle         
+       1.269405520 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.057662e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.232825e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.232825e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.025076e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.196021e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.196021e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.351364 sec
+TOTAL       :     6.587511 sec
 INFO: No Floating Point Exceptions have been reported
-    19,028,703,027      cycles                           #    2.994 GHz                    
-    46,088,094,034      instructions                     #    2.42  insn per cycle         
-       6.356654129 seconds time elapsed
+    19,326,799,797      cycles                           #    2.930 GHz                    
+    46,201,035,622      instructions                     #    2.39  insn per cycle         
+       6.597179522 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.610727e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.099544e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.099544e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.561064e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.042427e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.042427e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.309172 sec
+TOTAL       :     4.487009 sec
 INFO: No Floating Point Exceptions have been reported
-    12,915,279,178      cycles                           #    2.994 GHz                    
-    31,622,192,060      instructions                     #    2.45  insn per cycle         
-       4.314454195 seconds time elapsed
+    13,122,812,956      cycles                           #    2.919 GHz                    
+    31,726,329,549      instructions                     #    2.42  insn per cycle         
+       4.497261752 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.964702e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.728204e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.728204e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.964017e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.742650e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.742650e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.611197 sec
+TOTAL       :     3.652673 sec
 INFO: No Floating Point Exceptions have been reported
-    10,072,475,407      cycles                           #    2.786 GHz                    
-    19,603,807,541      instructions                     #    1.95  insn per cycle         
-       3.616483385 seconds time elapsed
+    10,238,914,237      cycles                           #    2.798 GHz                    
+    19,709,087,829      instructions                     #    1.92  insn per cycle         
+       3.662709498 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1912) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.073817e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.916083e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.916083e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.987318e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.812393e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.812393e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.432142 sec
+TOTAL       :     3.613935 sec
 INFO: No Floating Point Exceptions have been reported
-     9,841,961,259      cycles                           #    2.864 GHz                    
-    19,266,632,762      instructions                     #    1.96  insn per cycle         
-       3.437636381 seconds time elapsed
+    10,087,966,707      cycles                           #    2.786 GHz                    
+    19,370,511,659      instructions                     #    1.92  insn per cycle         
+       3.624175302 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1651) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.789510e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.380802e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.380802e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.729073e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.292942e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.292942e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.918568 sec
+TOTAL       :     4.086984 sec
 INFO: No Floating Point Exceptions have been reported
-     8,610,059,548      cycles                           #    2.195 GHz                    
-    15,735,305,253      instructions                     #    1.83  insn per cycle         
-       3.924031077 seconds time elapsed
+     8,757,365,492      cycles                           #    2.138 GHz                    
+    15,836,859,575      instructions                     #    1.81  insn per cycle         
+       4.097082998 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  848) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
index bc9d647fb4..1574c6c3cf 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:48:12
+DATE: 2024-09-15_11:51:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.122021e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.451137e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.638753e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.996142e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.634832e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.796610e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     1.870611 sec
+TOTAL       :     1.900321 sec
 INFO: No Floating Point Exceptions have been reported
-     6,204,814,895      cycles                           #    2.954 GHz                    
-    11,480,482,305      instructions                     #    1.85  insn per cycle         
-       2.157739025 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
+     6,197,801,426      cycles                           #    2.910 GHz                    
+    11,411,789,503      instructions                     #    1.84  insn per cycle         
+       2.187318710 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.059054e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.233189e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.233189e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.025883e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.199963e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.199963e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.345554 sec
+TOTAL       :     6.587430 sec
 INFO: No Floating Point Exceptions have been reported
-    19,029,442,022      cycles                           #    2.997 GHz                    
-    46,088,029,717      instructions                     #    2.42  insn per cycle         
-       6.350870900 seconds time elapsed
+    19,281,256,149      cycles                           #    2.928 GHz                    
+    46,192,094,635      instructions                     #    2.40  insn per cycle         
+       6.597986195 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.610948e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.102172e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.102172e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.548679e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.019540e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.019540e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.312066 sec
+TOTAL       :     4.521143 sec
 INFO: No Floating Point Exceptions have been reported
-    12,916,579,175      cycles                           #    2.992 GHz                    
-    31,622,030,303      instructions                     #    2.45  insn per cycle         
-       4.317489305 seconds time elapsed
+    13,278,378,525      cycles                           #    2.931 GHz                    
+    31,736,760,460      instructions                     #    2.39  insn per cycle         
+       4.531295049 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1663) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.024660e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.820920e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.820920e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.952371e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.722642e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.722642e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.509062 sec
+TOTAL       :     3.672885 sec
 INFO: No Floating Point Exceptions have been reported
-    10,061,648,485      cycles                           #    2.864 GHz                    
-    19,603,502,379      instructions                     #    1.95  insn per cycle         
-       3.514567609 seconds time elapsed
+    10,228,932,843      cycles                           #    2.778 GHz                    
+    19,706,958,837      instructions                     #    1.93  insn per cycle         
+       3.682647007 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1912) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.076351e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.915963e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.915963e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.003393e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.813864e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.813864e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.426777 sec
+TOTAL       :     3.587790 sec
 INFO: No Floating Point Exceptions have been reported
-     9,851,026,234      cycles                           #    2.871 GHz                    
-    19,268,085,761      instructions                     #    1.96  insn per cycle         
-       3.432072497 seconds time elapsed
+    10,033,694,863      cycles                           #    2.790 GHz                    
+    19,370,562,804      instructions                     #    1.93  insn per cycle         
+       3.597832664 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1651) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.788696e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.373983e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.373983e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.709775e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.272417e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.272417e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.919489 sec
+TOTAL       :     4.131241 sec
 INFO: No Floating Point Exceptions have been reported
-     8,601,360,723      cycles                           #    2.192 GHz                    
-    15,734,373,866      instructions                     #    1.83  insn per cycle         
-       3.924666234 seconds time elapsed
+     8,787,275,470      cycles                           #    2.123 GHz                    
+    15,836,849,319      instructions                     #    1.80  insn per cycle         
+       4.141346630 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  848) (512y:  156) (512z: 1257)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
index d94fcbe335..3b02782d45 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:11:39
+DATE: 2024-09-15_11:08:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.432610e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.772959e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.942951e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.564401e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.700588e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.875805e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.656283 sec
+TOTAL       :     0.684524 sec
 INFO: No Floating Point Exceptions have been reported
-     2,600,163,483      cycles                           #    2.927 GHz                    
-     4,052,933,946      instructions                     #    1.56  insn per cycle         
-       0.947198008 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,669,304,279      cycles                           #    2.860 GHz                    
+     4,081,785,887      instructions                     #    1.53  insn per cycle         
+       0.991787338 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.040048e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.211774e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.211774e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.030249e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.204005e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.204005e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.455813 sec
+TOTAL       :     6.555775 sec
 INFO: No Floating Point Exceptions have been reported
-    19,047,161,782      cycles                           #    2.949 GHz                    
-    46,053,166,008      instructions                     #    2.42  insn per cycle         
-       6.460830735 seconds time elapsed
+    19,264,579,235      cycles                           #    2.934 GHz                    
+    46,142,725,089      instructions                     #    2.40  insn per cycle         
+       6.567398103 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  452) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.605833e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.090868e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.090868e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.561230e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.041001e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.041001e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.322171 sec
+TOTAL       :     4.486484 sec
 INFO: No Floating Point Exceptions have been reported
-    12,945,614,006      cycles                           #    2.992 GHz                    
-    31,599,414,213      instructions                     #    2.44  insn per cycle         
-       4.327245745 seconds time elapsed
+    13,180,735,522      cycles                           #    2.931 GHz                    
+    31,698,753,932      instructions                     #    2.40  insn per cycle         
+       4.497601224 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1649) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.012855e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.814007e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.814007e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.959717e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.742175e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.742175e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.527895 sec
+TOTAL       :     3.661759 sec
 INFO: No Floating Point Exceptions have been reported
-    10,083,243,360      cycles                           #    2.855 GHz                    
-    19,584,052,787      instructions                     #    1.94  insn per cycle         
-       3.532820504 seconds time elapsed
+    10,296,251,645      cycles                           #    2.804 GHz                    
+    19,686,624,933      instructions                     #    1.91  insn per cycle         
+       3.673145773 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1895) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165090E-002
 Relative difference = 1.0277089176796747e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.040603e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.875933e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.875933e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.002735e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.826033e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.826033e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.488013 sec
+TOTAL       :     3.593177 sec
 INFO: No Floating Point Exceptions have been reported
-     9,881,550,683      cycles                           #    2.830 GHz                    
-    19,282,348,908      instructions                     #    1.95  insn per cycle         
-       3.493036517 seconds time elapsed
+    10,082,197,083      cycles                           #    2.798 GHz                    
+    19,384,360,663      instructions                     #    1.92  insn per cycle         
+       3.604587412 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1636) (512y:  178) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165090E-002
 Relative difference = 1.0277089176796747e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.829130e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.446924e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.446924e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.753760e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.344313e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.344313e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.841542 sec
+TOTAL       :     4.042847 sec
 INFO: No Floating Point Exceptions have been reported
-     8,444,886,940      cycles                           #    2.196 GHz                    
-    15,604,417,284      instructions                     #    1.85  insn per cycle         
-       3.846602501 seconds time elapsed
+     8,657,274,459      cycles                           #    2.136 GHz                    
+    15,708,080,882      instructions                     #    1.81  insn per cycle         
+       4.054289402 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  834) (512y:  156) (512z: 1237)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
index 94cb4ff06b..9adc226af5 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:32:12
+DATE: 2024-09-15_11:33:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.929041e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.617382e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.790354e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.203471e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.505439e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.793875e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.673044 sec
+TOTAL       :     0.692895 sec
 INFO: No Floating Point Exceptions have been reported
-     2,647,663,893      cycles                           #    2.921 GHz                    
-     4,117,337,322      instructions                     #    1.56  insn per cycle         
-       0.970576149 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
+     2,667,104,303      cycles                           #    2.870 GHz                    
+     4,197,568,068      instructions                     #    1.57  insn per cycle         
+       0.991023921 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.635868e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.098681e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.098681e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.609773e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.065122e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.065122e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.251149 sec
+TOTAL       :     4.323744 sec
 INFO: No Floating Point Exceptions have been reported
-    12,685,973,484      cycles                           #    2.981 GHz                    
-    32,572,050,993      instructions                     #    2.57  insn per cycle         
-       4.256454263 seconds time elapsed
+    12,680,899,676      cycles                           #    2.930 GHz                    
+    32,573,373,461      instructions                     #    2.57  insn per cycle         
+       4.329822925 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  281) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.069872e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.953267e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.953267e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.020941e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.881765e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.881765e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.446136 sec
+TOTAL       :     3.528541 sec
 INFO: No Floating Point Exceptions have been reported
-    10,327,366,247      cycles                           #    2.993 GHz                    
-    24,660,690,745      instructions                     #    2.39  insn per cycle         
-       3.451551574 seconds time elapsed
+    10,343,960,768      cycles                           #    2.928 GHz                    
+    24,660,363,232      instructions                     #    2.38  insn per cycle         
+       3.534351751 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1251) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.268023e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.332197e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.332197e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.219408e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.252697e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.252697e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.179322 sec
+TOTAL       :     3.248651 sec
 INFO: No Floating Point Exceptions have been reported
-     9,101,042,758      cycles                           #    2.859 GHz                    
-    16,944,009,823      instructions                     #    1.86  insn per cycle         
-       3.184712944 seconds time elapsed
+     9,122,079,188      cycles                           #    2.804 GHz                    
+    16,949,443,243      instructions                     #    1.86  insn per cycle         
+       3.254977824 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1616) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.323659e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.427846e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.427846e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.281631e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.372235e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.372235e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.107460 sec
+TOTAL       :     3.167737 sec
 INFO: No Floating Point Exceptions have been reported
-     8,903,453,254      cycles                           #    2.861 GHz                    
-    16,358,754,262      instructions                     #    1.84  insn per cycle         
-       3.112772356 seconds time elapsed
+     8,922,630,281      cycles                           #    2.812 GHz                    
+    16,368,012,425      instructions                     #    1.83  insn per cycle         
+       3.174211351 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1352) (512y:  139) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.012554e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.786256e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.786256e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.953386e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.685128e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.685128e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.528360 sec
+TOTAL       :     3.635921 sec
 INFO: No Floating Point Exceptions have been reported
-     7,891,857,406      cycles                           #    2.234 GHz                    
-    14,578,880,134      instructions                     #    1.85  insn per cycle         
-       3.533675166 seconds time elapsed
+     7,907,839,436      cycles                           #    2.172 GHz                    
+    14,593,864,068      instructions                     #    1.85  insn per cycle         
+       3.642895717 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1003) (512y:  158) (512z:  955)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
index aeece3cb1c..a111e191c2 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_d_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:32:40
+DATE: 2024-09-15_11:34:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.944309e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.640485e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.841925e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.369824e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.600320e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.803756e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.670764 sec
+TOTAL       :     0.688866 sec
 INFO: No Floating Point Exceptions have been reported
-     2,641,145,737      cycles                           #    2.922 GHz                    
-     4,081,113,697      instructions                     #    1.55  insn per cycle         
-       0.966027510 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
+     2,687,883,365      cycles                           #    2.879 GHz                    
+     4,137,672,828      instructions                     #    1.54  insn per cycle         
+       0.991982760 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039868165201E-002
 Relative difference = 1.0277080522138477e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_d_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.130231e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.983916e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.983916e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.085552e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.924035e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.924035e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.355515 sec
+TOTAL       :     3.430840 sec
 INFO: No Floating Point Exceptions have been reported
-    10,000,706,305      cycles                           #    2.977 GHz                    
-    25,508,878,851      instructions                     #    2.55  insn per cycle         
-       3.360784759 seconds time elapsed
+    10,018,256,596      cycles                           #    2.916 GHz                    
+    25,507,694,274      instructions                     #    2.55  insn per cycle         
+       3.436494229 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  236) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.421455e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.716932e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.716932e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.371301e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.639987e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.639987e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.001722 sec
+TOTAL       :     3.073954 sec
 INFO: No Floating Point Exceptions have been reported
-     9,011,683,226      cycles                           #    2.998 GHz                    
-    21,468,272,813      instructions                     #    2.38  insn per cycle         
-       3.007077981 seconds time elapsed
+     9,025,495,783      cycles                           #    2.931 GHz                    
+    21,478,170,721      instructions                     #    2.38  insn per cycle         
+       3.080490687 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1100) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868164916E-002
 Relative difference = 1.0277102699700292e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.418837e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.668858e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.668858e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.348565e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.529213e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.529213e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.000616 sec
+TOTAL       :     3.091062 sec
 INFO: No Floating Point Exceptions have been reported
-     8,681,159,666      cycles                           #    2.889 GHz                    
-    15,895,078,651      instructions                     #    1.83  insn per cycle         
-       3.006012376 seconds time elapsed
+     8,721,037,733      cycles                           #    2.816 GHz                    
+    15,901,191,500      instructions                     #    1.82  insn per cycle         
+       3.097416237 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1489) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.476533e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.780002e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.780002e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.428348e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.696607e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.696607e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     2.939228 sec
+TOTAL       :     3.003427 sec
 INFO: No Floating Point Exceptions have been reported
-     8,461,049,520      cycles                           #    2.875 GHz                    
-    15,605,360,429      instructions                     #    1.84  insn per cycle         
-       2.944460500 seconds time elapsed
+     8,472,649,935      cycles                           #    2.816 GHz                    
+    15,622,192,614      instructions                     #    1.84  insn per cycle         
+       3.009695803 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1264) (512y:  141) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.080846e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.942278e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.942278e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.053185e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.879921e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.879921e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.429466 sec
+TOTAL       :     3.481285 sec
 INFO: No Floating Point Exceptions have been reported
-     7,594,294,635      cycles                           #    2.211 GHz                    
-    14,291,461,453      instructions                     #    1.88  insn per cycle         
-       3.434950736 seconds time elapsed
+     7,632,139,448      cycles                           #    2.189 GHz                    
+    14,304,829,590      instructions                     #    1.87  insn per cycle         
+       3.488200715 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1032) (512y:  164) (512z:  877)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039868165088E-002
 Relative difference = 1.0277089312025782e-08
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
index f57c7b21b1..bc5233a5ba 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:12:09
+DATE: 2024-09-15_11:09:06
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.440125e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.939612e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.936542e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.192132e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.336696e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.300693e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.569720 sec
+TOTAL       :     0.585451 sec
 INFO: No Floating Point Exceptions have been reported
-     2,292,199,788      cycles                           #    2.893 GHz                    
-     3,624,722,505      instructions                     #    1.58  insn per cycle         
-       0.851143724 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,336,870,734      cycles                           #    2.880 GHz                    
+     3,644,936,097      instructions                     #    1.56  insn per cycle         
+       0.870296260 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.092122e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.295282e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.295282e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.072651e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.269381e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.269381e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.123236 sec
+TOTAL       :     6.255061 sec
 INFO: No Floating Point Exceptions have been reported
-    18,282,268,739      cycles                           #    2.985 GHz                    
-    45,003,688,571      instructions                     #    2.46  insn per cycle         
-       6.127863395 seconds time elapsed
+    18,355,246,050      cycles                           #    2.931 GHz                    
+    45,043,077,667      instructions                     #    2.45  insn per cycle         
+       6.263286658 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.300657e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.500016e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.500016e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.241713e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.430745e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.430745e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.101147 sec
+TOTAL       :     3.200925 sec
 INFO: No Floating Point Exceptions have been reported
-     9,272,391,340      cycles                           #    2.986 GHz                    
-    22,287,274,481      instructions                     #    2.40  insn per cycle         
-       3.106124711 seconds time elapsed
+     9,386,491,422      cycles                           #    2.926 GHz                    
+    22,329,398,339      instructions                     #    2.38  insn per cycle         
+       3.208910381 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.435250e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.745190e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.745190e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.404117e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.697492e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.697492e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.946093 sec
+TOTAL       :     3.006682 sec
 INFO: No Floating Point Exceptions have been reported
-     8,415,505,503      cycles                           #    2.853 GHz                    
-    15,755,045,400      instructions                     #    1.87  insn per cycle         
-       2.951003636 seconds time elapsed
+     8,484,958,572      cycles                           #    2.815 GHz                    
+    15,797,352,563      instructions                     #    1.86  insn per cycle         
+       3.014624816 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2564) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.503188e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.850723e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.850723e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.426806e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.765840e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.765840e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.877500 sec
+TOTAL       :     2.982618 sec
 INFO: No Floating Point Exceptions have been reported
-     8,240,649,806      cycles                           #    2.860 GHz                    
-    15,608,293,943      instructions                     #    1.89  insn per cycle         
-       2.882240516 seconds time elapsed
+     8,401,165,701      cycles                           #    2.811 GHz                    
+    15,653,777,374      instructions                     #    1.86  insn per cycle         
+       2.990373231 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.533350e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.890104e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.890104e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.426463e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.722731e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.722731e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.848263 sec
+TOTAL       :     2.987827 sec
 INFO: No Floating Point Exceptions have been reported
-     6,616,696,344      cycles                           #    2.320 GHz                    
-    12,864,238,029      instructions                     #    1.94  insn per cycle         
-       2.853239358 seconds time elapsed
+     6,753,744,387      cycles                           #    2.255 GHz                    
+    12,906,211,238      instructions                     #    1.91  insn per cycle         
+       2.995926915 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   17) (512z: 1440)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052585973637E-002
 Relative difference = 2.0158743040564767e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
index 35a9e5f0a4..95b8681521 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:42:15
+DATE: 2024-09-15_11:45:42
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.205566e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.525744e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.525744e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.076713e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.378449e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.378449e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.687596 sec
+TOTAL       :     1.944337 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,641,583,670      cycles                           #    2.942 GHz                    
-    10,304,772,583      instructions                     #    1.83  insn per cycle         
-       1.974857486 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
+     6,200,765,179      cycles                           #    2.831 GHz                    
+    10,073,714,089      instructions                     #    1.62  insn per cycle         
+       2.274311561 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.078790e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.269966e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.269966e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.013177e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.196081e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.196081e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.303292 sec
+TOTAL       :     6.753252 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    18,887,037,403      cycles                           #    2.994 GHz                    
-    45,157,662,858      instructions                     #    2.39  insn per cycle         
-       6.309618883 seconds time elapsed
+    19,140,260,721      cycles                           #    2.851 GHz                    
+    45,281,984,182      instructions                     #    2.37  insn per cycle         
+       6.770979415 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.206720e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.289779e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.289779e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.075255e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.101362e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.101362e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.339455 sec
+TOTAL       :     3.587325 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,009,397,596      cycles                           #    2.993 GHz                    
-    23,624,007,980      instructions                     #    2.36  insn per cycle         
-       3.345685257 seconds time elapsed
+    10,243,661,246      cycles                           #    2.856 GHz                    
+    23,736,113,257      instructions                     #    2.32  insn per cycle         
+       3.601313820 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.372007e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.557809e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.557809e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.208611e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.299964e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.299964e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.134578 sec
+TOTAL       :     3.407535 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,093,525,796      cycles                           #    2.896 GHz                    
-    16,876,695,371      instructions                     #    1.86  insn per cycle         
-       3.140803970 seconds time elapsed
+     9,325,104,909      cycles                           #    2.739 GHz                    
+    16,992,883,294      instructions                     #    1.82  insn per cycle         
+       3.420829574 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2564) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.406670e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.628174e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.628174e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.223475e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.357947e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.357947e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     3.100606 sec
+TOTAL       :     3.391442 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     8,981,201,315      cycles                           #    2.892 GHz                    
-    16,731,017,995      instructions                     #    1.86  insn per cycle         
-       3.106832833 seconds time elapsed
+     9,279,675,392      cycles                           #    2.737 GHz                    
+    16,862,711,706      instructions                     #    1.82  insn per cycle         
+       3.405369019 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.401752e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.585254e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.585254e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.268012e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.359028e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.359028e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     3.116972 sec
+TOTAL       :     3.328677 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,400,362,239      cycles                           #    2.371 GHz                    
-    14,073,208,836      instructions                     #    1.90  insn per cycle         
-       3.123177526 seconds time elapsed
+     7,543,875,114      cycles                           #    2.260 GHz                    
+    14,180,005,728      instructions                     #    1.88  insn per cycle         
+       3.340981281 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   17) (512z: 1440)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052585973637E-002
 Relative difference = 2.0158743040564767e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
index a657a65df0..15fa7d3112 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:54:14
+DATE: 2024-09-15_11:58:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.247248e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.173509e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.063213e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.336762e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.510278e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.479828e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371863e-02 +- 3.269951e-06 )  GeV^0
-TOTAL       :     1.182559 sec
+TOTAL       :     1.311031 sec
 INFO: No Floating Point Exceptions have been reported
-     4,144,228,991      cycles                           #    2.949 GHz                    
-     6,631,660,348      instructions                     #    1.60  insn per cycle         
-       1.462664969 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
+     4,430,467,531      cycles                           #    2.889 GHz                    
+     6,960,795,222      instructions                     #    1.57  insn per cycle         
+       1.590735457 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.086369e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.285646e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.285646e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.075223e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.270769e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.270769e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270267e-06 )  GeV^0
-TOTAL       :     6.497519 sec
+TOTAL       :     6.687798 sec
 INFO: No Floating Point Exceptions have been reported
-    19,270,516,007      cycles                           #    2.964 GHz                    
-    45,188,255,066      instructions                     #    2.34  insn per cycle         
-       6.502716523 seconds time elapsed
+    19,629,378,634      cycles                           #    2.933 GHz                    
+    45,588,143,016      instructions                     #    2.32  insn per cycle         
+       6.694819566 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.305512e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.522941e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.522941e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.248280e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.433718e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.433718e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371887e-02 +- 3.270266e-06 )  GeV^0
-TOTAL       :     3.424680 sec
+TOTAL       :     3.634348 sec
 INFO: No Floating Point Exceptions have been reported
-    10,285,963,197      cycles                           #    3.000 GHz                    
-    22,368,079,047      instructions                     #    2.17  insn per cycle         
-       3.429925704 seconds time elapsed
+    10,674,006,930      cycles                           #    2.932 GHz                    
+    22,771,305,471      instructions                     #    2.13  insn per cycle         
+       3.641620548 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.479341e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.792000e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.792000e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.377112e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.647798e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.647798e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.229115 sec
+TOTAL       :     3.480978 sec
 INFO: No Floating Point Exceptions have been reported
-     9,380,692,116      cycles                           #    2.902 GHz                    
-    15,667,536,193      instructions                     #    1.67  insn per cycle         
-       3.234097773 seconds time elapsed
+     9,770,888,729      cycles                           #    2.802 GHz                    
+    16,055,948,307      instructions                     #    1.64  insn per cycle         
+       3.487814115 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2564) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.511800e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.864478e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.864478e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.421430e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.770475e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.770475e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.204591 sec
+TOTAL       :     3.434581 sec
 INFO: No Floating Point Exceptions have been reported
-     9,285,052,446      cycles                           #    2.894 GHz                    
-    15,324,225,187      instructions                     #    1.65  insn per cycle         
-       3.209705429 seconds time elapsed
+     9,740,474,003      cycles                           #    2.831 GHz                    
+    15,722,386,015      instructions                     #    1.61  insn per cycle         
+       3.441655213 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.530155e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.885798e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.885798e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.441505e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.731463e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.731463e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371885e-02 +- 3.270112e-06 )  GeV^0
-TOTAL       :     3.192143 sec
+TOTAL       :     3.419856 sec
 INFO: No Floating Point Exceptions have been reported
-     7,665,193,061      cycles                           #    2.398 GHz                    
-    12,573,697,492      instructions                     #    1.64  insn per cycle         
-       3.197431615 seconds time elapsed
+     8,031,724,309      cycles                           #    2.344 GHz                    
+    12,960,768,751      instructions                     #    1.61  insn per cycle         
+       3.427508239 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   17) (512z: 1440)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052585973637E-002
 Relative difference = 2.0158743040564767e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
index 8c6a0f3af4..cd1edf8b07 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:51:28
+DATE: 2024-09-15_11:55:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.262526e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.390328e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.387630e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.341129e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.656597e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.613096e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.856218 sec
+TOTAL       :     0.860917 sec
 INFO: No Floating Point Exceptions have been reported
-     3,161,544,131      cycles                           #    2.937 GHz                    
-     6,517,835,009      instructions                     #    2.06  insn per cycle         
-       1.134957109 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
+     3,166,806,232      cycles                           #    2.886 GHz                    
+     6,390,531,049      instructions                     #    2.02  insn per cycle         
+       1.154020491 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.096959e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.295586e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.295586e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.067801e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267766e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267766e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.095656 sec
+TOTAL       :     6.288953 sec
 INFO: No Floating Point Exceptions have been reported
-    18,241,874,835      cycles                           #    2.990 GHz                    
-    45,006,676,387      instructions                     #    2.47  insn per cycle         
-       6.100863007 seconds time elapsed
+    18,410,332,076      cycles                           #    2.925 GHz                    
+    45,054,816,993      instructions                     #    2.45  insn per cycle         
+       6.296062030 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.302660e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.506502e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.506502e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.250758e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.437877e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.437877e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.102726 sec
+TOTAL       :     3.189367 sec
 INFO: No Floating Point Exceptions have been reported
-     9,285,265,734      cycles                           #    2.989 GHz                    
-    22,287,225,294      instructions                     #    2.40  insn per cycle         
-       3.107827990 seconds time elapsed
+     9,348,605,615      cycles                           #    2.926 GHz                    
+    22,330,060,575      instructions                     #    2.39  insn per cycle         
+       3.196473636 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.467927e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.777181e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.777181e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.398564e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.676038e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.676038e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.912326 sec
+TOTAL       :     3.014863 sec
 INFO: No Floating Point Exceptions have been reported
-     8,373,944,597      cycles                           #    2.872 GHz                    
-    15,754,624,876      instructions                     #    1.88  insn per cycle         
-       2.917669267 seconds time elapsed
+     8,484,963,512      cycles                           #    2.808 GHz                    
+    15,797,579,651      instructions                     #    1.86  insn per cycle         
+       3.022063297 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2564) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.499171e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.843892e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.843892e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.428672e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.765186e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.765186e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.880358 sec
+TOTAL       :     2.974553 sec
 INFO: No Floating Point Exceptions have been reported
-     8,253,904,909      cycles                           #    2.861 GHz                    
-    15,614,471,146      instructions                     #    1.89  insn per cycle         
-       2.885437824 seconds time elapsed
+     8,386,796,916      cycles                           #    2.814 GHz                    
+    15,657,891,969      instructions                     #    1.87  insn per cycle         
+       2.981555653 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.523885e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.854007e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.854007e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.445697e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.728486e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.728486e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.856554 sec
+TOTAL       :     2.962981 sec
 INFO: No Floating Point Exceptions have been reported
-     6,625,726,004      cycles                           #    2.316 GHz                    
-    12,865,251,919      instructions                     #    1.94  insn per cycle         
-       2.861676497 seconds time elapsed
+     6,681,340,868      cycles                           #    2.251 GHz                    
+    12,907,012,003      instructions                     #    1.93  insn per cycle         
+       2.970184514 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   17) (512z: 1440)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052585973637E-002
 Relative difference = 2.0158743040564767e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
index fb5ea9b7ec..6589d6b6fa 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:48:43
+DATE: 2024-09-15_11:52:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 12 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.018326e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.328781e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.289875e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.003631e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.641927e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.671119e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371710e-02 +- 3.270389e-06 )  GeV^0
-TOTAL       :     1.485303 sec
+TOTAL       :     1.488823 sec
 INFO: No Floating Point Exceptions have been reported
-     5,030,861,863      cycles                           #    2.949 GHz                    
-     9,228,129,648      instructions                     #    1.83  insn per cycle         
-       1.764637764 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
+     4,967,613,517      cycles                           #    2.899 GHz                    
+     9,171,831,308      instructions                     #    1.85  insn per cycle         
+       1.769936667 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.101305e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.300250e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.300250e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.073262e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.267974e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.267974e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.075575 sec
+TOTAL       :     6.250612 sec
 INFO: No Floating Point Exceptions have been reported
-    18,237,738,264      cycles                           #    3.000 GHz                    
-    45,006,180,430      instructions                     #    2.47  insn per cycle         
-       6.080723981 seconds time elapsed
+    18,322,110,309      cycles                           #    2.929 GHz                    
+    45,051,388,062      instructions                     #    2.46  insn per cycle         
+       6.257775360 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  411) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.297930e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.508269e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.508269e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.247339e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.424409e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.424409e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.101909 sec
+TOTAL       :     3.190677 sec
 INFO: No Floating Point Exceptions have been reported
-     9,293,555,965      cycles                           #    2.992 GHz                    
-    22,286,993,678      instructions                     #    2.40  insn per cycle         
-       3.107037940 seconds time elapsed
+     9,359,387,266      cycles                           #    2.928 GHz                    
+    22,331,498,291      instructions                     #    2.39  insn per cycle         
+       3.197654484 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1956) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.452818e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.733465e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.733465e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.363785e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.679448e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.679448e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.926757 sec
+TOTAL       :     3.047896 sec
 INFO: No Floating Point Exceptions have been reported
-     8,366,818,253      cycles                           #    2.855 GHz                    
-    15,756,195,031      instructions                     #    1.88  insn per cycle         
-       2.931860697 seconds time elapsed
+     8,583,132,130      cycles                           #    2.811 GHz                    
+    15,806,350,534      instructions                     #    1.84  insn per cycle         
+       3.054826008 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2564) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.524375e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.884976e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.884976e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.430638e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.771124e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.771124e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.852545 sec
+TOTAL       :     2.974287 sec
 INFO: No Floating Point Exceptions have been reported
-     8,237,825,361      cycles                           #    2.884 GHz                    
-    15,608,755,520      instructions                     #    1.89  insn per cycle         
-       2.857595702 seconds time elapsed
+     8,401,059,167      cycles                           #    2.818 GHz                    
+    15,651,581,046      instructions                     #    1.86  insn per cycle         
+       2.981875735 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2467) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 12 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.526966e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.890608e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.890608e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.438697e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.736996e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.736996e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.855239 sec
+TOTAL       :     2.973534 sec
 INFO: No Floating Point Exceptions have been reported
-     6,638,810,786      cycles                           #    2.322 GHz                    
-    12,863,885,814      instructions                     #    1.94  insn per cycle         
-       2.860360436 seconds time elapsed
+     6,722,109,548      cycles                           #    2.256 GHz                    
+    12,906,606,049      instructions                     #    1.92  insn per cycle         
+       2.981153680 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1697) (512y:   17) (512z: 1440)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052585973637E-002
 Relative difference = 2.0158743040564767e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
index 00eaba46ff..bed528f6e7 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:12:35
+DATE: 2024-09-15_11:09:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.447555e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.168245e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.316057e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.185244e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.645179e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.802907e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.567766 sec
+TOTAL       :     0.581560 sec
 INFO: No Floating Point Exceptions have been reported
-     2,287,776,846      cycles                           #    2.893 GHz                    
-     3,650,539,912      instructions                     #    1.60  insn per cycle         
-       0.849995377 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,321,713,730      cycles                           #    2.869 GHz                    
+     3,648,873,879      instructions                     #    1.57  insn per cycle         
+       0.865547071 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 79
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.093159e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.289183e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.289183e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.067642e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.262437e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.262437e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     6.118530 sec
+TOTAL       :     6.278398 sec
 INFO: No Floating Point Exceptions have been reported
-    18,272,965,628      cycles                           #    2.985 GHz                    
-    44,981,058,442      instructions                     #    2.46  insn per cycle         
-       6.123441325 seconds time elapsed
+    18,394,901,899      cycles                           #    2.927 GHz                    
+    45,013,341,285      instructions                     #    2.45  insn per cycle         
+       6.286516700 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  397) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039854866802E-002
 Relative difference = 1.1313746984080878e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.227466e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.393043e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.393043e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.249815e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.432877e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.432877e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.201221 sec
+TOTAL       :     3.188261 sec
 INFO: No Floating Point Exceptions have been reported
-     9,275,282,455      cycles                           #    2.894 GHz                    
-    22,254,657,541      instructions                     #    2.40  insn per cycle         
-       3.206036710 seconds time elapsed
+     9,382,779,388      cycles                           #    2.937 GHz                    
+    22,291,184,899      instructions                     #    2.38  insn per cycle         
+       3.196123670 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1939) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.467502e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.773696e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.773696e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.394804e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.683014e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.683014e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.910424 sec
+TOTAL       :     3.016616 sec
 INFO: No Floating Point Exceptions have been reported
-     8,366,603,031      cycles                           #    2.871 GHz                    
-    15,747,210,395      instructions                     #    1.88  insn per cycle         
-       2.915453077 seconds time elapsed
+     8,501,260,075      cycles                           #    2.812 GHz                    
+    15,791,303,131      instructions                     #    1.86  insn per cycle         
+       3.024850695 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2539) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.509019e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.861570e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.861570e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.433401e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.784502e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.784502e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.868356 sec
+TOTAL       :     2.973425 sec
 INFO: No Floating Point Exceptions have been reported
-     8,253,227,438      cycles                           #    2.873 GHz                    
-    15,590,446,238      instructions                     #    1.89  insn per cycle         
-       2.873277360 seconds time elapsed
+     8,414,276,106      cycles                           #    2.823 GHz                    
+    15,633,261,481      instructions                     #    1.86  insn per cycle         
+       2.981340876 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2436) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053255361738E-002
 Relative difference = 2.5376902468575066e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.537998e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.900122e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.900122e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.449856e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.750896e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.750896e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.844576 sec
+TOTAL       :     2.962493 sec
 INFO: No Floating Point Exceptions have been reported
-     6,605,768,385      cycles                           #    2.322 GHz                    
-    12,840,284,625      instructions                     #    1.94  insn per cycle         
-       2.849472031 seconds time elapsed
+     6,702,761,235      cycles                           #    2.257 GHz                    
+    12,885,740,598      instructions                     #    1.92  insn per cycle         
+       2.970728824 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1667) (512y:   18) (512z: 1428)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052564145764E-002
 Relative difference = 1.9988585667912256e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
index f0525a51b6..711fbf3a50 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:33:04
+DATE: 2024-09-15_11:34:45
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.346385e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.706042e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.743361e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.272057e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.453757e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.411368e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.570463 sec
+TOTAL       :     0.577320 sec
 INFO: No Floating Point Exceptions have been reported
-     2,331,559,602      cycles                           #    2.930 GHz                    
-     3,691,953,100      instructions                     #    1.58  insn per cycle         
-       0.853068830 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
+     2,322,367,280      cycles                           #    2.881 GHz                    
+     3,616,476,077      instructions                     #    1.56  insn per cycle         
+       0.862621614 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 109
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.672409e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.178714e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.178714e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.635657e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.129532e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.129532e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     4.122248 sec
+TOTAL       :     4.218771 sec
 INFO: No Floating Point Exceptions have been reported
-    12,193,246,018      cycles                           #    2.955 GHz                    
-    32,297,778,851      instructions                     #    2.65  insn per cycle         
-       4.127366635 seconds time elapsed
+    12,191,913,623      cycles                           #    2.887 GHz                    
+    32,293,306,178      instructions                     #    2.65  insn per cycle         
+       4.224304323 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  290) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039840314887E-002
 Relative difference = 1.244813035273009e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.726003e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.613506e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.613506e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.654215e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.464911e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.464911e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.677890 sec
+TOTAL       :     2.746425 sec
 INFO: No Floating Point Exceptions have been reported
-     7,975,411,172      cycles                           #    2.973 GHz                    
-    18,722,844,123      instructions                     #    2.35  insn per cycle         
-       2.683225038 seconds time elapsed
+     8,013,864,577      cycles                           #    2.914 GHz                    
+    18,725,751,725      instructions                     #    2.34  insn per cycle         
+       2.751635696 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1548) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039283704129E-002
 Relative difference = 5.583829420356249e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.810407e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.657488e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.657488e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.734762e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.516819e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.516819e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.603296 sec
+TOTAL       :     2.676523 sec
 INFO: No Floating Point Exceptions have been reported
-     7,478,624,708      cycles                           #    2.868 GHz                    
-    14,257,879,880      instructions                     #    1.91  insn per cycle         
-       2.608417178 seconds time elapsed
+     7,476,186,846      cycles                           #    2.791 GHz                    
+    14,257,923,546      instructions                     #    1.91  insn per cycle         
+       2.682062632 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2237) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053244447801E-002
 Relative difference = 2.5291823782248813e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.894749e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.863066e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.863066e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.834242e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.778618e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.778618e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.537757 sec
+TOTAL       :     2.592675 sec
 INFO: No Floating Point Exceptions have been reported
-     7,332,061,078      cycles                           #    2.884 GHz                    
-    13,960,145,042      instructions                     #    1.90  insn per cycle         
-       2.542891951 seconds time elapsed
+     7,344,696,907      cycles                           #    2.828 GHz                    
+    13,952,931,831      instructions                     #    1.90  insn per cycle         
+       2.598198803 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2087) (512y:    3) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053244447801E-002
 Relative difference = 2.5291823782248813e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.593845e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.034171e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.034171e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.491060e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.875896e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.875896e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.793516 sec
+TOTAL       :     2.903149 sec
 INFO: No Floating Point Exceptions have been reported
-     6,517,481,617      cycles                           #    2.329 GHz                    
-    13,435,057,935      instructions                     #    2.06  insn per cycle         
-       2.798703190 seconds time elapsed
+     6,571,286,194      cycles                           #    2.260 GHz                    
+    13,433,545,963      instructions                     #    2.04  insn per cycle         
+       2.908820313 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2077) (512y:    1) (512z: 1199)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052562326775E-002
 Relative difference = 1.997440588685788e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
index bd20a7074b..6fc527ffa1 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_f_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:33:27
+DATE: 2024-09-15_11:35:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.351814e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.868103e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.009929e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.289380e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.618768e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.817817e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371687e-02 +- 3.270220e-06 )  GeV^0
-TOTAL       :     0.574031 sec
+TOTAL       :     0.580301 sec
 INFO: No Floating Point Exceptions have been reported
-     2,336,591,618      cycles                           #    2.929 GHz                    
-     3,652,107,547      instructions                     #    1.56  insn per cycle         
-       0.856248359 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
+     2,323,629,755      cycles                           #    2.848 GHz                    
+     3,593,641,981      instructions                     #    1.55  insn per cycle         
+       0.873274895 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 79
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282802e-02
 Avg ME (F77/GPU)   = 1.2828112125134794E-002
 Relative difference = 7.1815552823662555e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_f_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.220207e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.240567e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.240567e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.209448e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.220446e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.220446e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     3.196282 sec
+TOTAL       :     3.220192 sec
 INFO: No Floating Point Exceptions have been reported
-     9,413,750,643      cycles                           #    2.941 GHz                    
-    25,704,588,109      instructions                     #    2.73  insn per cycle         
-       3.201438558 seconds time elapsed
+     9,366,774,041      cycles                           #    2.905 GHz                    
+    25,702,432,609      instructions                     #    2.74  insn per cycle         
+       3.225730639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  243) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039838495897E-002
 Relative difference = 1.2589928273811243e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.045043e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.666844e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.666844e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.014104e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.557363e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.557363e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371707e-02 +- 3.270376e-06 )  GeV^0
-TOTAL       :     2.433842 sec
+TOTAL       :     2.462960 sec
 INFO: No Floating Point Exceptions have been reported
-     7,255,793,129      cycles                           #    2.976 GHz                    
-    16,893,479,508      instructions                     #    2.33  insn per cycle         
-       2.438946558 seconds time elapsed
+     7,216,847,131      cycles                           #    2.925 GHz                    
+    16,891,846,951      instructions                     #    2.34  insn per cycle         
+       2.468502980 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1350) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039280066150E-002
 Relative difference = 5.612189004572479e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.990768e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.128430e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.128430e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.924187e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.020326e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.020326e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.464869 sec
+TOTAL       :     2.522608 sec
 INFO: No Floating Point Exceptions have been reported
-     7,151,311,767      cycles                           #    2.896 GHz                    
-    13,635,278,053      instructions                     #    1.91  insn per cycle         
-       2.470013945 seconds time elapsed
+     7,150,122,380      cycles                           #    2.829 GHz                    
+    13,633,449,373      instructions                     #    1.91  insn per cycle         
+       2.528205937 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2061) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053220800939E-002
 Relative difference = 2.5107486628541925e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.053885e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.294288e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.294288e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.976818e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.175866e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.175866e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270341e-06 )  GeV^0
-TOTAL       :     2.422093 sec
+TOTAL       :     2.485267 sec
 INFO: No Floating Point Exceptions have been reported
-     7,026,614,378      cycles                           #    2.896 GHz                    
-    13,450,065,466      instructions                     #    1.91  insn per cycle         
-       2.427233834 seconds time elapsed
+     7,047,642,186      cycles                           #    2.830 GHz                    
+    13,442,931,038      instructions                     #    1.91  insn per cycle         
+       2.490839699 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1940) (512y:    4) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828053220800939E-002
 Relative difference = 2.5107486628541925e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.692384e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.295789e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.295789e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.604837e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.103202e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.103202e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270342e-06 )  GeV^0
-TOTAL       :     2.703935 sec
+TOTAL       :     2.790744 sec
 INFO: No Floating Point Exceptions have been reported
-     6,330,095,085      cycles                           #    2.337 GHz                    
-    13,164,073,111      instructions                     #    2.08  insn per cycle         
-       2.709192314 seconds time elapsed
+     6,349,721,778      cycles                           #    2.272 GHz                    
+    13,164,680,615      instructions                     #    2.07  insn per cycle         
+       2.796235299 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2033) (512y:    1) (512z: 1085)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282805e-02
 Avg ME (F77/C++)    = 1.2828052536860923E-002
 Relative difference = 1.977588895209662e-07
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
index 9e3c172345..caa67d1a4c 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:13:01
+DATE: 2024-09-15_11:10:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.431285e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.767514e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.933255e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.610039e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.567106e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.762593e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.658633 sec
+TOTAL       :     0.688497 sec
 INFO: No Floating Point Exceptions have been reported
-     2,599,271,297      cycles                           #    2.919 GHz                    
-     4,048,239,508      instructions                     #    1.56  insn per cycle         
-       0.950222116 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,665,329,968      cycles                           #    2.845 GHz                    
+     4,055,682,627      instructions                     #    1.52  insn per cycle         
+       0.995154695 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 166
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039901590279E-002
 Relative difference = 7.671454200650844e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.034382e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.205392e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.205392e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.002881e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.168467e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.168467e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.484958 sec
+TOTAL       :     6.726627 sec
 INFO: No Floating Point Exceptions have been reported
-    19,390,681,434      cycles                           #    2.989 GHz                    
-    46,278,360,693      instructions                     #    2.39  insn per cycle         
-       6.489877171 seconds time elapsed
+    19,724,365,546      cycles                           #    2.928 GHz                    
+    46,388,641,620      instructions                     #    2.35  insn per cycle         
+       6.737968541 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  466) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039952548879E-002
 Relative difference = 3.6990156841838714e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.649240e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.184330e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.184330e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.617185e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.143896e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.143896e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.219438 sec
+TOTAL       :     4.347629 sec
 INFO: No Floating Point Exceptions have been reported
-    12,631,065,383      cycles                           #    2.991 GHz                    
-    31,480,470,322      instructions                     #    2.49  insn per cycle         
-       4.224490541 seconds time elapsed
+    12,771,945,524      cycles                           #    2.931 GHz                    
+    31,577,972,239      instructions                     #    2.47  insn per cycle         
+       4.359278192 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1719) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039952548879E-002
 Relative difference = 3.6990156841838714e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.008639e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.799566e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.799566e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.943978e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.720569e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.720569e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.536691 sec
+TOTAL       :     3.688531 sec
 INFO: No Floating Point Exceptions have been reported
-    10,084,304,672      cycles                           #    2.849 GHz                    
-    19,466,113,659      instructions                     #    1.93  insn per cycle         
-       3.541680504 seconds time elapsed
+    10,322,037,008      cycles                           #    2.790 GHz                    
+    19,570,801,424      instructions                     #    1.90  insn per cycle         
+       3.699996508 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2042) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.056481e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.878077e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.878077e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.986657e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.789520e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.789520e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.458920 sec
+TOTAL       :     3.618212 sec
 INFO: No Floating Point Exceptions have been reported
-     9,912,300,699      cycles                           #    2.862 GHz                    
-    19,210,733,007      instructions                     #    1.94  insn per cycle         
-       3.464025045 seconds time elapsed
+    10,149,499,266      cycles                           #    2.797 GHz                    
+    19,312,096,557      instructions                     #    1.90  insn per cycle         
+       3.629679706 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1785) (512y:  189) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.834847e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.462926e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.462926e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.777391e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.385382e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.385382e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.837142 sec
+TOTAL       :     3.992684 sec
 INFO: No Floating Point Exceptions have been reported
-     8,397,548,989      cycles                           #    2.187 GHz                    
-    15,057,354,786      instructions                     #    1.79  insn per cycle         
-       3.842236435 seconds time elapsed
+     8,588,251,503      cycles                           #    2.146 GHz                    
+    15,161,251,122      instructions                     #    1.77  insn per cycle         
+       4.003805537 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  954) (512y:  154) (512z: 1322)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
diff --git a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
index 248fe7bdc2..ce1b16067d 100644
--- a/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_eemumu_mad/log_eemumu_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum'
 
-DATE: 2024-09-01_23:13:31
+DATE: 2024-09-15_11:10:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.434537e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.765664e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.929797e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.695377e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.640031e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.828039e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     0.657503 sec
+TOTAL       :     0.676129 sec
 INFO: No Floating Point Exceptions have been reported
-     2,608,075,068      cycles                           #    2.928 GHz                    
-     4,002,537,795      instructions                     #    1.53  insn per cycle         
-       0.948969404 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,632,225,960      cycles                           #    2.883 GHz                    
+     4,132,384,248      instructions                     #    1.57  insn per cycle         
+       0.970493332 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 154
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.282804e-02
 Avg ME (F77/GPU)   = 1.2828039901590279E-002
 Relative difference = 7.671454200650844e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.043875e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.213372e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.213372e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.005676e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.176899e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.176899e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     6.427915 sec
+TOTAL       :     6.735465 sec
 INFO: No Floating Point Exceptions have been reported
-    19,262,761,937      cycles                           #    2.996 GHz                    
-    46,214,044,642      instructions                     #    2.40  insn per cycle         
-       6.432871980 seconds time elapsed
+    19,720,225,593      cycles                           #    2.924 GHz                    
+    46,326,489,596      instructions                     #    2.35  insn per cycle         
+       6.746197968 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039952548879E-002
 Relative difference = 3.6990156841838714e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.660972e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.186103e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.186103e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.574820e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.144081e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.144081e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     4.195922 sec
+TOTAL       :     4.449866 sec
 INFO: No Floating Point Exceptions have been reported
-    12,533,967,278      cycles                           #    2.984 GHz                    
-    31,452,817,753      instructions                     #    2.51  insn per cycle         
-       4.200889036 seconds time elapsed
+    13,065,779,841      cycles                           #    2.930 GHz                    
+    31,555,443,434      instructions                     #    2.42  insn per cycle         
+       4.460852067 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1711) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039952548879E-002
 Relative difference = 3.6990156841838714e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.945450e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.706622e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.706622e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.952135e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.730440e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.730440e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.643145 sec
+TOTAL       :     3.677079 sec
 INFO: No Floating Point Exceptions have been reported
-    10,121,690,290      cycles                           #    2.776 GHz                    
-    19,453,283,154      instructions                     #    1.92  insn per cycle         
-       3.648095738 seconds time elapsed
+    10,320,566,663      cycles                           #    2.800 GHz                    
+    19,557,785,526      instructions                     #    1.90  insn per cycle         
+       3.688245631 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2026) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.066814e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.889937e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.889937e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.981919e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.782784e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.782784e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.441077 sec
+TOTAL       :     3.626089 sec
 INFO: No Floating Point Exceptions have been reported
-     9,892,482,908      cycles                           #    2.871 GHz                    
-    19,285,621,568      instructions                     #    1.95  insn per cycle         
-       3.446134033 seconds time elapsed
+    10,150,645,903      cycles                           #    2.793 GHz                    
+    19,388,040,023      instructions                     #    1.91  insn per cycle         
+       3.637342012 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1779) (512y:  189) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 12 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_EPEM_MUPMUM_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.898057e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.568308e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.568308e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.806136e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.449559e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.449559e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.371706e-02 +- 3.270315e-06 )  GeV^0
-TOTAL       :     3.718166 sec
+TOTAL       :     3.931921 sec
 INFO: No Floating Point Exceptions have been reported
-     8,231,795,227      cycles                           #    2.212 GHz                    
-    14,974,124,019      instructions                     #    1.82  insn per cycle         
-       3.723482499 seconds time elapsed
+     8,442,748,276      cycles                           #    2.150 GHz                    
+    15,068,523,446      instructions                     #    1.78  insn per cycle         
+       3.943167549 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  947) (512y:  156) (512z: 1306)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 2 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 2 channels { 1 : 256, 2 : 256 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/ee_mumu.mad/SubProcesses/P1_epem_mupmum/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.282804e-02
 Avg ME (F77/C++)    = 1.2828039951670679E-002
 Relative difference = 3.767475112924841e-09
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
index d2edd191ed..aeadfaae64 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:14:01
+DATE: 2024-09-15_11:11:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.670147e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.437270e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.004291e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.391981e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.330443e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.949573e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.523890 sec
+TOTAL       :     0.534465 sec
 INFO: No Floating Point Exceptions have been reported
-     2,178,385,782      cycles                           #    2.885 GHz                    
-     3,182,588,432      instructions                     #    1.46  insn per cycle         
-       0.812064572 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,210,485,527      cycles                           #    2.869 GHz                    
+     3,136,829,588      instructions                     #    1.42  insn per cycle         
+       0.828432932 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.858232e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.905852e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.905852e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.818281e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.865529e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.865529e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.747452 sec
+TOTAL       :     5.911600 sec
 INFO: No Floating Point Exceptions have been reported
-    17,237,152,794      cycles                           #    2.997 GHz                    
-    45,932,361,580      instructions                     #    2.66  insn per cycle         
-       5.752612178 seconds time elapsed
+    17,389,649,504      cycles                           #    2.935 GHz                    
+    46,036,709,188      instructions                     #    2.65  insn per cycle         
+       5.925127688 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.186119e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.349184e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.349184e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.165855e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.325075e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.325075e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.399306 sec
+TOTAL       :     3.453922 sec
 INFO: No Floating Point Exceptions have been reported
-    10,043,894,289      cycles                           #    2.951 GHz                    
-    27,839,601,349      instructions                     #    2.77  insn per cycle         
-       3.404666318 seconds time elapsed
+    10,171,046,914      cycles                           #    2.936 GHz                    
+    27,937,548,503      instructions                     #    2.75  insn per cycle         
+       3.465600263 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.942338e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.320449e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.320449e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.967598e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.358232e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.358232e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.228364 sec
+TOTAL       :     2.252729 sec
 INFO: No Floating Point Exceptions have been reported
-     6,109,581,175      cycles                           #    2.736 GHz                    
-    12,581,401,053      instructions                     #    2.06  insn per cycle         
-       2.233705562 seconds time elapsed
+     6,219,848,194      cycles                           #    2.748 GHz                    
+    12,677,070,824      instructions                     #    2.04  insn per cycle         
+       2.263945260 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.484110e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.946408e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.946408e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.478036e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.948706e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.948706e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.018536 sec
+TOTAL       :     2.055945 sec
 INFO: No Floating Point Exceptions have been reported
-     5,570,191,475      cycles                           #    2.753 GHz                    
-    12,019,013,229      instructions                     #    2.16  insn per cycle         
-       2.023911725 seconds time elapsed
+     5,693,440,562      cycles                           #    2.756 GHz                    
+    12,116,317,958      instructions                     #    2.13  insn per cycle         
+       2.067013514 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2357) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.594274e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.785812e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.785812e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.483396e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.667202e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.667202e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.024112 sec
+TOTAL       :     3.151048 sec
 INFO: No Floating Point Exceptions have been reported
-     5,693,875,340      cycles                           #    1.880 GHz                    
-     8,293,914,748      instructions                     #    1.46  insn per cycle         
-       3.029595102 seconds time elapsed
+     5,836,401,977      cycles                           #    1.846 GHz                    
+     8,391,475,751      instructions                     #    1.44  insn per cycle         
+       3.162234928 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
index 23f45cdec4..9022013b0c 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:42:43
+DATE: 2024-09-15_11:46:12
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.493626e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.788449e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.788449e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.381047e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.782856e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.782856e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.815005 sec
+TOTAL       :     0.840023 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,127,275,476      cycles                           #    2.947 GHz                    
-     4,836,149,999      instructions                     #    1.55  insn per cycle         
-       1.119465655 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
+     3,108,426,192      cycles                           #    2.861 GHz                    
+     4,770,924,698      instructions                     #    1.53  insn per cycle         
+       1.146594198 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.858044e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.905317e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.905317e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.806159e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.852377e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.852377e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.830025 sec
+TOTAL       :     6.067320 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    17,599,347,450      cycles                           #    3.016 GHz                    
-    45,994,268,031      instructions                     #    2.61  insn per cycle         
-       5.836722741 seconds time elapsed
+    17,856,813,573      cycles                           #    2.936 GHz                    
+    46,243,571,751      instructions                     #    2.59  insn per cycle         
+       6.083398130 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.211060e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.365706e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.365706e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.134236e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.289007e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.289007e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.451835 sec
+TOTAL       :     3.611222 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,389,093,510      cycles                           #    3.005 GHz                    
-    28,024,409,034      instructions                     #    2.70  insn per cycle         
-       3.458495700 seconds time elapsed
+    10,640,342,598      cycles                           #    2.934 GHz                    
+    28,274,377,614      instructions                     #    2.66  insn per cycle         
+       3.627839941 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.022670e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.402018e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.402018e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.918119e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.295386e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.295386e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.278536 sec
+TOTAL       :     2.398079 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,457,442,090      cycles                           #    2.827 GHz                    
-    12,870,074,518      instructions                     #    1.99  insn per cycle         
-       2.285452391 seconds time elapsed
+     6,685,318,792      cycles                           #    2.770 GHz                    
+    13,122,453,026      instructions                     #    1.96  insn per cycle         
+       2.414568983 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.538075e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.001427e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.001427e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.370026e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.821205e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.821205e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.080838 sec
+TOTAL       :     2.218649 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,931,949,770      cycles                           #    2.843 GHz                    
-    12,306,854,953      instructions                     #    2.07  insn per cycle         
-       2.087528338 seconds time elapsed
+     6,189,711,182      cycles                           #    2.770 GHz                    
+    12,557,371,407      instructions                     #    2.03  insn per cycle         
+       2.235322482 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2357) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.606488e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.794562e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.794562e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.469847e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.650575e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.650575e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.095535 sec
+TOTAL       :     3.290950 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,067,000,673      cycles                           #    1.956 GHz                    
-     8,541,484,367      instructions                     #    1.41  insn per cycle         
-       3.102199651 seconds time elapsed
+     6,320,487,640      cycles                           #    1.913 GHz                    
+     8,791,643,966      instructions                     #    1.39  insn per cycle         
+       3.307886654 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
index d505b87692..85f95aac4c 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:54:42
+DATE: 2024-09-15_11:58:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.236642e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.108245e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.774075e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.424217e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.466051e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.011391e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     0.625400 sec
+TOTAL       :     0.646400 sec
 INFO: No Floating Point Exceptions have been reported
-     2,500,275,084      cycles                           #    2.922 GHz                    
-     3,660,372,041      instructions                     #    1.46  insn per cycle         
-       0.913293893 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
+     2,528,248,856      cycles                           #    2.879 GHz                    
+     3,688,196,917      instructions                     #    1.46  insn per cycle         
+       0.934986871 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.860204e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.908027e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.908027e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.819736e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.867152e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.867152e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     5.803551 sec
+TOTAL       :     5.985044 sec
 INFO: No Floating Point Exceptions have been reported
-    17,410,566,389      cycles                           #    2.998 GHz                    
-    45,948,240,516      instructions                     #    2.64  insn per cycle         
-       5.809030601 seconds time elapsed
+    17,600,111,411      cycles                           #    2.936 GHz                    
+    46,124,554,790      instructions                     #    2.62  insn per cycle         
+       5.995774241 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.242345e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.401455e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.401455e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.168097e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.326675e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.326675e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.399393 sec
+TOTAL       :     3.533047 sec
 INFO: No Floating Point Exceptions have been reported
-    10,226,528,939      cycles                           #    3.004 GHz                    
-    27,838,493,655      instructions                     #    2.72  insn per cycle         
-       3.404881515 seconds time elapsed
+    10,405,596,477      cycles                           #    2.937 GHz                    
+    28,016,084,485      instructions                     #    2.69  insn per cycle         
+       3.543840924 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.089523e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.488778e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.488778e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.000915e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.392912e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.392912e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.227519 sec
+TOTAL       :     2.319991 sec
 INFO: No Floating Point Exceptions have been reported
-     6,296,693,700      cycles                           #    2.821 GHz                    
-    12,563,741,360      instructions                     #    2.00  insn per cycle         
-       2.232914857 seconds time elapsed
+     6,443,899,577      cycles                           #    2.766 GHz                    
+    12,743,367,354      instructions                     #    1.98  insn per cycle         
+       2.330731733 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.492330e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.954969e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.954969e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.453355e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.917864e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.917864e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     2.078395 sec
+TOTAL       :     2.148765 sec
 INFO: No Floating Point Exceptions have been reported
-     5,760,797,019      cycles                           #    2.765 GHz                    
-    11,965,976,847      instructions                     #    2.08  insn per cycle         
-       2.084185413 seconds time elapsed
+     5,965,699,512      cycles                           #    2.764 GHz                    
+    12,146,978,501      instructions                     #    2.04  insn per cycle         
+       2.159407986 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2357) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.621135e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.815003e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.815003e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.499391e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.683844e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.683844e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079401e+00 +- 3.402993e-03 )  GeV^0
-TOTAL       :     3.061718 sec
+TOTAL       :     3.221377 sec
 INFO: No Floating Point Exceptions have been reported
-     5,891,546,452      cycles                           #    1.922 GHz                    
-     8,243,365,807      instructions                     #    1.40  insn per cycle         
-       3.067136953 seconds time elapsed
+     6,080,803,082      cycles                           #    1.882 GHz                    
+     8,423,087,351      instructions                     #    1.39  insn per cycle         
+       3.232264502 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
index f1a7279f4b..a207bf6969 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:51:54
+DATE: 2024-09-15_11:55:37
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.273434e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.280412e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.968161e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.567312e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.455238e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.007252e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.562544 sec
+TOTAL       :     0.563061 sec
 INFO: No Floating Point Exceptions have been reported
-     2,322,125,439      cycles                           #    2.928 GHz                    
-     3,645,722,192      instructions                     #    1.57  insn per cycle         
-       0.850249349 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
+     2,285,403,688      cycles                           #    2.879 GHz                    
+     3,587,140,832      instructions                     #    1.57  insn per cycle         
+       0.850548380 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.858859e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.905786e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.905786e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.818247e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.865618e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.865618e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.747389 sec
+TOTAL       :     5.910910 sec
 INFO: No Floating Point Exceptions have been reported
-    17,237,233,259      cycles                           #    2.997 GHz                    
-    45,935,224,991      instructions                     #    2.66  insn per cycle         
-       5.752803841 seconds time elapsed
+    17,366,644,102      cycles                           #    2.933 GHz                    
+    46,048,599,584      instructions                     #    2.65  insn per cycle         
+       5.921509733 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.234441e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.394817e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.394817e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.165029e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.323362e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.323362e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.347494 sec
+TOTAL       :     3.462860 sec
 INFO: No Floating Point Exceptions have been reported
-    10,060,095,556      cycles                           #    3.001 GHz                    
-    27,840,812,105      instructions                     #    2.77  insn per cycle         
-       3.352886847 seconds time elapsed
+    10,189,100,613      cycles                           #    2.933 GHz                    
+    27,958,354,355      instructions                     #    2.74  insn per cycle         
+       3.474989469 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.050411e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.453973e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.453973e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.979916e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.371493e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.371493e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.184644 sec
+TOTAL       :     2.251622 sec
 INFO: No Floating Point Exceptions have been reported
-     6,160,511,677      cycles                           #    2.816 GHz                    
-    12,582,281,977      instructions                     #    2.04  insn per cycle         
-       2.190146485 seconds time elapsed
+     6,239,671,348      cycles                           #    2.759 GHz                    
+    12,698,985,692      instructions                     #    2.04  insn per cycle         
+       2.262379977 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.619096e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.102203e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.102203e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.471204e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.935728e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.935728e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.971941 sec
+TOTAL       :     2.061624 sec
 INFO: No Floating Point Exceptions have been reported
-     5,581,458,565      cycles                           #    2.824 GHz                    
-    12,016,750,817      instructions                     #    2.15  insn per cycle         
-       1.977376660 seconds time elapsed
+     5,721,552,525      cycles                           #    2.762 GHz                    
+    12,134,935,244      instructions                     #    2.12  insn per cycle         
+       2.072280515 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2357) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.649577e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.841164e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.841164e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.500472e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.684328e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.684328e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.978936 sec
+TOTAL       :     3.142449 sec
 INFO: No Floating Point Exceptions have been reported
-     5,694,372,557      cycles                           #    1.909 GHz                    
-     8,293,954,373      instructions                     #    1.46  insn per cycle         
-       2.984356046 seconds time elapsed
+     5,837,185,756      cycles                           #    1.853 GHz                    
+     8,411,639,410      instructions                     #    1.44  insn per cycle         
+       3.153001098 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
index c659726d0d..fa08fbada3 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:49:10
+DATE: 2024-09-15_11:52:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.858498e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.254024e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.935929e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.726431e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.401767e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.003069e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.707983 sec
+TOTAL       :     0.719407 sec
 INFO: No Floating Point Exceptions have been reported
-     2,759,086,933      cycles                           #    2.941 GHz                    
-     4,352,636,424      instructions                     #    1.58  insn per cycle         
-       0.995067409 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
+     2,718,962,196      cycles                           #    2.853 GHz                    
+     4,261,744,999      instructions                     #    1.57  insn per cycle         
+       1.009279183 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.826998e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.873338e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.873338e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.804236e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.851320e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.851320e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.846045 sec
+TOTAL       :     5.958188 sec
 INFO: No Floating Point Exceptions have been reported
-    17,230,561,081      cycles                           #    2.945 GHz                    
-    45,931,993,548      instructions                     #    2.67  insn per cycle         
-       5.851455306 seconds time elapsed
+    17,386,557,790      cycles                           #    2.923 GHz                    
+    46,053,036,463      instructions                     #    2.65  insn per cycle         
+       5.968882862 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.244950e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.403701e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.403701e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.150158e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.308857e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.308857e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.336054 sec
+TOTAL       :     3.477756 sec
 INFO: No Floating Point Exceptions have been reported
-    10,036,104,723      cycles                           #    3.004 GHz                    
-    27,841,427,493      instructions                     #    2.77  insn per cycle         
-       3.341549572 seconds time elapsed
+    10,179,732,087      cycles                           #    2.919 GHz                    
+    27,956,952,229      instructions                     #    2.75  insn per cycle         
+       3.488776572 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2531) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.088493e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.483787e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.483787e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.931728e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.313640e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.313640e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.165858 sec
+TOTAL       :     2.272950 sec
 INFO: No Floating Point Exceptions have been reported
-     6,085,227,169      cycles                           #    2.804 GHz                    
-    12,581,161,598      instructions                     #    2.07  insn per cycle         
-       2.171236876 seconds time elapsed
+     6,250,280,118      cycles                           #    2.738 GHz                    
+    12,699,256,189      instructions                     #    2.03  insn per cycle         
+       2.283692354 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2619) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.560737e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.029058e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.029058e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.454076e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.923276e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.923276e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     1.990270 sec
+TOTAL       :     2.069149 sec
 INFO: No Floating Point Exceptions have been reported
-     5,638,832,491      cycles                           #    2.827 GHz                    
-    12,018,376,967      instructions                     #    2.13  insn per cycle         
-       1.995707338 seconds time elapsed
+     5,725,799,884      cycles                           #    2.754 GHz                    
+    12,135,179,967      instructions                     #    2.12  insn per cycle         
+       2.079783939 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2357) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.665545e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.859718e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.859718e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.414550e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.592343e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.592343e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.965370 sec
+TOTAL       :     3.218110 sec
 INFO: No Floating Point Exceptions have been reported
-     5,714,700,961      cycles                           #    1.924 GHz                    
-     8,293,609,502      instructions                     #    1.45  insn per cycle         
-       2.970791328 seconds time elapsed
+     5,959,079,250      cycles                           #    1.847 GHz                    
+     8,422,189,176      instructions                     #    1.41  insn per cycle         
+       3.228977501 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1441) (512y:  122) (512z: 1802)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
index f09bf18ae6..328467ef63 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:14:25
+DATE: 2024-09-15_11:11:27
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.636159e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.308317e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.889429e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.368553e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.328924e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.965685e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.522612 sec
+TOTAL       :     0.531897 sec
 INFO: No Floating Point Exceptions have been reported
-     2,180,477,207      cycles                           #    2.895 GHz                    
-     3,172,870,054      instructions                     #    1.46  insn per cycle         
-       0.810836216 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,204,349,816      cycles                           #    2.864 GHz                    
+     3,169,634,690      instructions                     #    1.44  insn per cycle         
+       0.825649601 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.897784e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.948555e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.948555e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.870268e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.919754e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.919754e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.630375 sec
+TOTAL       :     5.735202 sec
 INFO: No Floating Point Exceptions have been reported
-    16,757,601,134      cycles                           #    2.974 GHz                    
-    44,924,975,083      instructions                     #    2.68  insn per cycle         
-       5.635488054 seconds time elapsed
+    16,847,713,247      cycles                           #    2.933 GHz                    
+    44,981,738,957      instructions                     #    2.67  insn per cycle         
+       5.744748484 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.395050e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.569829e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.569829e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.300705e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.473658e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.473658e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.193638 sec
+TOTAL       :     3.304655 sec
 INFO: No Floating Point Exceptions have been reported
-     9,562,770,661      cycles                           #    2.990 GHz                    
-    26,698,506,537      instructions                     #    2.79  insn per cycle         
-       3.198859598 seconds time elapsed
+     9,659,083,497      cycles                           #    2.916 GHz                    
+    26,749,720,361      instructions                     #    2.77  insn per cycle         
+       3.314062418 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2328) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.695250e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.026786e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.026786e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.583161e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.907950e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.907950e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.337651 sec
+TOTAL       :     2.415079 sec
 INFO: No Floating Point Exceptions have been reported
-     6,614,817,603      cycles                           #    2.825 GHz                    
-    14,119,225,912      instructions                     #    2.13  insn per cycle         
-       2.342792443 seconds time elapsed
+     6,675,336,151      cycles                           #    2.753 GHz                    
+    14,174,925,457      instructions                     #    2.12  insn per cycle         
+       2.425332683 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2710) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.825490e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.171719e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.171719e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.674600e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.017717e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.017717e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.278000 sec
+TOTAL       :     2.378233 sec
 INFO: No Floating Point Exceptions have been reported
-     6,343,251,608      cycles                           #    2.779 GHz                    
-    13,705,736,126      instructions                     #    2.16  insn per cycle         
-       2.283174093 seconds time elapsed
+     6,574,155,578      cycles                           #    2.754 GHz                    
+    13,789,180,928      instructions                     #    2.10  insn per cycle         
+       2.388565062 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2356) (512y:  297) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.324436e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.489374e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.489374e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.383516e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.554686e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.554686e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.260583 sec
+TOTAL       :     3.229855 sec
 INFO: No Floating Point Exceptions have been reported
-     5,903,112,082      cycles                           #    1.808 GHz                    
-    10,062,261,188      instructions                     #    1.70  insn per cycle         
-       3.265855245 seconds time elapsed
+     5,994,389,719      cycles                           #    1.851 GHz                    
+    10,123,629,860      instructions                     #    1.69  insn per cycle         
+       3.240029027 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1273) (512y:  208) (512z: 1988)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
index c4737ae009..2da881e2b3 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:33:49
+DATE: 2024-09-15_11:35:31
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.441577e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.233882e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.865376e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.302471e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.316570e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.001729e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.524909 sec
+TOTAL       :     0.535501 sec
 INFO: No Floating Point Exceptions have been reported
-     2,204,501,755      cycles                           #    2.906 GHz                    
-     3,161,352,230      instructions                     #    1.43  insn per cycle         
-       0.815153775 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
+     2,215,078,191      cycles                           #    2.874 GHz                    
+     3,154,679,095      instructions                     #    1.42  insn per cycle         
+       0.829146554 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.482527e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.569569e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.569569e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.351910e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.430033e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.430033e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.328984 sec
+TOTAL       :     4.571525 sec
 INFO: No Floating Point Exceptions have been reported
-    13,010,239,001      cycles                           #    3.002 GHz                    
-    34,342,983,400      instructions                     #    2.64  insn per cycle         
-       4.334635297 seconds time elapsed
+    13,035,401,690      cycles                           #    2.848 GHz                    
+    34,355,905,973      instructions                     #    2.64  insn per cycle         
+       4.578322526 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  665) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.026227e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.165110e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.165110e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.974205e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.110138e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.110138e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.570531 sec
+TOTAL       :     3.639722 sec
 INFO: No Floating Point Exceptions have been reported
-    10,688,107,015      cycles                           #    2.990 GHz                    
-    24,011,117,449      instructions                     #    2.25  insn per cycle         
-       3.576054169 seconds time elapsed
+    10,720,308,622      cycles                           #    2.941 GHz                    
+    24,027,850,859      instructions                     #    2.24  insn per cycle         
+       3.646936507 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.603793e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.926174e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.926174e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.622097e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.949479e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.949479e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.384762 sec
+TOTAL       :     2.385273 sec
 INFO: No Floating Point Exceptions have been reported
-     6,599,795,120      cycles                           #    2.762 GHz                    
-    12,349,103,691      instructions                     #    1.87  insn per cycle         
-       2.390301945 seconds time elapsed
+     6,607,425,584      cycles                           #    2.762 GHz                    
+    12,368,604,074      instructions                     #    1.87  insn per cycle         
+       2.392729796 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3103) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.982939e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.357856e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.357856e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.850394e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.302393e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.302393e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.211076 sec
+TOTAL       :     2.279711 sec
 INFO: No Floating Point Exceptions have been reported
-     6,160,427,268      cycles                           #    2.780 GHz                    
-    11,570,234,949      instructions                     #    1.88  insn per cycle         
-       2.216581669 seconds time elapsed
+     6,291,615,830      cycles                           #    2.752 GHz                    
+    11,595,311,145      instructions                     #    1.84  insn per cycle         
+       2.287442889 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2648) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.869317e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.091369e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.091369e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.743982e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.952525e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.952525e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.815766 sec
+TOTAL       :     2.921108 sec
 INFO: No Floating Point Exceptions have been reported
-     5,389,719,668      cycles                           #    1.911 GHz                    
-     9,283,534,251      instructions                     #    1.72  insn per cycle         
-       2.821353617 seconds time elapsed
+     5,423,773,794      cycles                           #    1.852 GHz                    
+     9,310,782,229      instructions                     #    1.72  insn per cycle         
+       2.929084188 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2096) (512y:  282) (512z: 1955)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
index 4598d7e2e7..86df224c90 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_d_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:34:12
+DATE: 2024-09-15_11:35:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.473262e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.369697e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.001580e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.255241e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.245059e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.949681e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.525400 sec
+TOTAL       :     0.533151 sec
 INFO: No Floating Point Exceptions have been reported
-     2,217,363,075      cycles                           #    2.923 GHz                    
-     3,200,444,976      instructions                     #    1.44  insn per cycle         
-       0.815931801 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
+     2,204,451,822      cycles                           #    2.864 GHz                    
+     3,124,206,943      instructions                     #    1.42  insn per cycle         
+       0.826948420 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063388516822
 Relative difference = 3.2588034143755247e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.621117e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.716737e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.716737e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.566402e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.659710e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.659710e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     4.105569 sec
+TOTAL       :     4.199423 sec
 INFO: No Floating Point Exceptions have been reported
-    12,322,987,130      cycles                           #    2.998 GHz                    
-    34,913,845,215      instructions                     #    2.83  insn per cycle         
-       4.110930955 seconds time elapsed
+    12,339,181,649      cycles                           #    2.934 GHz                    
+    34,922,451,175      instructions                     #    2.83  insn per cycle         
+       4.206438418 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  430) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515649
 Relative difference = 3.258803992249869e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.044826e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.184646e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.184646e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.977481e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.113067e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.113067e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.549215 sec
+TOTAL       :     3.637534 sec
 INFO: No Floating Point Exceptions have been reported
-    10,667,286,256      cycles                           #    3.002 GHz                    
-    23,012,274,265      instructions                     #    2.16  insn per cycle         
-       3.554585973 seconds time elapsed
+    10,710,920,290      cycles                           #    2.939 GHz                    
+    23,032,620,692      instructions                     #    2.15  insn per cycle         
+       3.644897421 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2340) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388515654
 Relative difference = 3.2588039900609506e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.959234e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.341805e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.341805e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.909385e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.279214e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.279214e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.219712 sec
+TOTAL       :     2.252873 sec
 INFO: No Floating Point Exceptions have been reported
-     6,206,346,340      cycles                           #    2.790 GHz                    
-    11,957,801,780      instructions                     #    1.93  insn per cycle         
-       2.225127509 seconds time elapsed
+     6,212,002,997      cycles                           #    2.749 GHz                    
+    11,978,645,016      instructions                     #    1.93  insn per cycle         
+       2.260347594 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2491) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.169842e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.570767e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.570767e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.039032e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.423530e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.423530e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.131713 sec
+TOTAL       :     2.196002 sec
 INFO: No Floating Point Exceptions have been reported
-     6,032,167,736      cycles                           #    2.824 GHz                    
-    11,128,490,395      instructions                     #    1.84  insn per cycle         
-       2.137129004 seconds time elapsed
+     6,062,556,643      cycles                           #    2.753 GHz                    
+    11,146,456,018      instructions                     #    1.84  insn per cycle         
+       2.203425956 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2103) (512y:  174) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.056148e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.296486e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.296486e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.871031e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.091641e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.091641e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.690977 sec
+TOTAL       :     2.826553 sec
 INFO: No Floating Point Exceptions have been reported
-     5,188,865,221      cycles                           #    1.925 GHz                    
-     9,022,420,854      instructions                     #    1.74  insn per cycle         
-       2.696419516 seconds time elapsed
+     5,265,704,314      cycles                           #    1.859 GHz                    
+     9,046,022,125      instructions                     #    1.72  insn per cycle         
+       2.834187629 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1632) (512y:  208) (512z: 1571)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063388516204
 Relative difference = 3.2588037186351226e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
index 43668d5a11..e1d11759a7 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:14:50
+DATE: 2024-09-15_11:11:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.016549e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.793906e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.905869e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.179768e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.708203e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.827426e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.487274 sec
+TOTAL       :     0.491224 sec
 INFO: No Floating Point Exceptions have been reported
-     1,997,739,441      cycles                           #    2.811 GHz                    
-     2,897,236,768      instructions                     #    1.45  insn per cycle         
-       0.769495399 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,056,714,115      cycles                           #    2.865 GHz                    
+     2,916,773,309      instructions                     #    1.42  insn per cycle         
+       0.776029796 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.882513e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.935230e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.935230e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.918838e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.972832e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.972832e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.656519 sec
+TOTAL       :     5.552302 sec
 INFO: No Floating Point Exceptions have been reported
-    16,240,144,549      cycles                           #    2.870 GHz                    
-    45,333,715,938      instructions                     #    2.79  insn per cycle         
-       5.661567866 seconds time elapsed
+    16,247,282,670      cycles                           #    2.924 GHz                    
+    45,328,928,537      instructions                     #    2.79  insn per cycle         
+       5.557963082 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.418761e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.747409e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.747409e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.529514e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.866293e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.866293e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.459987 sec
+TOTAL       :     2.402922 sec
 INFO: No Floating Point Exceptions have been reported
-     7,053,646,042      cycles                           #    2.862 GHz                    
-    17,771,434,741      instructions                     #    2.52  insn per cycle         
-       2.465198154 seconds time elapsed
+     7,055,912,070      cycles                           #    2.931 GHz                    
+    17,768,218,222      instructions                     #    2.52  insn per cycle         
+       2.408607319 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.065007e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.131343e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.131343e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.299822e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.410195e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.410195e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.386858 sec
+TOTAL       :     1.350007 sec
 INFO: No Floating Point Exceptions have been reported
-     3,742,121,111      cycles                           #    2.690 GHz                    
-     8,264,654,394      instructions                     #    2.21  insn per cycle         
-       1.392057287 seconds time elapsed
+     3,747,874,250      cycles                           #    2.767 GHz                    
+     8,260,976,747      instructions                     #    2.20  insn per cycle         
+       1.355686963 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.712893e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.953154e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.953154e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.794382e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.005480e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.005480e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.290169 sec
+TOTAL       :     1.278305 sec
 INFO: No Floating Point Exceptions have been reported
-     3,547,204,215      cycles                           #    2.739 GHz                    
-     7,917,181,494      instructions                     #    2.23  insn per cycle         
-       1.295874973 seconds time elapsed
+     3,550,706,297      cycles                           #    2.767 GHz                    
+     7,915,681,558      instructions                     #    2.23  insn per cycle         
+       1.284036639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3208) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.314722e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.941966e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.941966e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.489307e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.134354e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.134354e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.748569 sec
+TOTAL       :     1.705925 sec
 INFO: No Floating Point Exceptions have been reported
-     3,258,838,954      cycles                           #    1.859 GHz                    
-     6,097,393,995      instructions                     #    1.87  insn per cycle         
-       1.753998859 seconds time elapsed
+     3,272,576,419      cycles                           #    1.913 GHz                    
+     6,103,138,487      instructions                     #    1.86  insn per cycle         
+       1.712010321 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2249) (512y:   24) (512z: 2155)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
index af35127683..4785fec175 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:43:09
+DATE: 2024-09-15_11:46:39
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.996475e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.103004e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.103004e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.985126e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.401873e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.401873e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.679008 sec
+TOTAL       :     0.682896 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,647,282,815      cycles                           #    2.916 GHz                    
-     4,083,441,957      instructions                     #    1.54  insn per cycle         
-       0.966781318 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
+     2,622,267,762      cycles                           #    2.874 GHz                    
+     4,057,326,622      instructions                     #    1.55  insn per cycle         
+       0.970912293 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublocks*gputhreads=524288)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -79,35 +79,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.961039e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.017138e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.017138e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.900194e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.953379e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.953379e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.474034 sec
+TOTAL       :     5.681942 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    16,432,761,783      cycles                           #    2.999 GHz                    
-    45,377,602,500      instructions                     #    2.76  insn per cycle         
-       5.480474769 seconds time elapsed
+    16,671,667,536      cycles                           #    2.929 GHz                    
+    45,497,192,820      instructions                     #    2.73  insn per cycle         
+       5.692941265 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -115,33 +115,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.583926e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.923485e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.923485e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.487658e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.829841e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.829841e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.417806 sec
+TOTAL       :     2.506047 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     7,259,616,163      cycles                           #    2.996 GHz                    
-    18,052,508,494      instructions                     #    2.49  insn per cycle         
-       2.424027513 seconds time elapsed
+     7,392,710,973      cycles                           #    2.938 GHz                    
+    18,162,302,699      instructions                     #    2.46  insn per cycle         
+       2.517172183 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -149,33 +149,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.420996e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.554165e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.554165e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.160433e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.262111e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.262111e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.376482 sec
+TOTAL       :     1.454756 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,944,996,980      cycles                           #    2.854 GHz                    
-     8,501,442,678      instructions                     #    2.15  insn per cycle         
-       1.382794301 seconds time elapsed
+     4,077,667,192      cycles                           #    2.783 GHz                    
+     8,611,395,195      instructions                     #    2.11  insn per cycle         
+       1.466232607 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -183,33 +183,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.851997e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.010518e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.010518e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.644244e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.901065e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.901065e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.316183 sec
+TOTAL       :     1.382676 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,756,474,273      cycles                           #    2.842 GHz                    
-     8,156,395,844      instructions                     #    2.17  insn per cycle         
-       1.322500713 seconds time elapsed
+     3,878,507,500      cycles                           #    2.784 GHz                    
+     8,265,873,907      instructions                     #    2.13  insn per cycle         
+       1.393862906 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3208) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -217,33 +217,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=524288)
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.544940e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.196808e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.196808e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.387022e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.022226e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.022226e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.734787 sec
+TOTAL       :     1.814510 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,470,022,350      cycles                           #    1.994 GHz                    
-     6,354,238,204      instructions                     #    1.83  insn per cycle         
-       1.741207745 seconds time elapsed
+     3,594,832,163      cycles                           #    1.970 GHz                    
+     6,462,220,806      instructions                     #    1.80  insn per cycle         
+       1.825958297 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2249) (512y:   24) (512z: 2155)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -251,8 +251,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
index daf3f60730..ff1a0d1a39 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:55:07
+DATE: 2024-09-15_11:58:55
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.105377e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.687892e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.803548e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.402978e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.823028e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.958083e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079446e+00 +- 3.403306e-03 )  GeV^0
-TOTAL       :     0.572824 sec
+TOTAL       :     0.595970 sec
 INFO: No Floating Point Exceptions have been reported
-     2,316,864,231      cycles                           #    2.921 GHz                    
-     3,396,488,753      instructions                     #    1.47  insn per cycle         
-       0.852389057 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
+     2,351,354,548      cycles                           #    2.876 GHz                    
+     3,444,506,671      instructions                     #    1.46  insn per cycle         
+       0.875001238 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.963961e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.019337e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.019337e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.915967e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.970379e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.970379e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079573e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     5.478025 sec
+TOTAL       :     5.649395 sec
 INFO: No Floating Point Exceptions have been reported
-    16,411,823,234      cycles                           #    2.994 GHz                    
-    45,361,511,621      instructions                     #    2.76  insn per cycle         
-       5.483296816 seconds time elapsed
+    16,580,294,518      cycles                           #    2.931 GHz                    
+    45,471,304,380      instructions                     #    2.74  insn per cycle         
+       5.657307639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.600554e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.940535e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.940535e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.539990e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.880360e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.880360e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079572e+00 +- 3.404712e-03 )  GeV^0
-TOTAL       :     2.418365 sec
+TOTAL       :     2.485589 sec
 INFO: No Floating Point Exceptions have been reported
-     7,218,096,486      cycles                           #    2.979 GHz                    
-    17,782,873,843      instructions                     #    2.46  insn per cycle         
-       2.423530812 seconds time elapsed
+     7,332,359,110      cycles                           #    2.941 GHz                    
+    17,888,678,821      instructions                     #    2.44  insn per cycle         
+       2.493593708 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.543812e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.711869e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.711869e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.267444e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.414148e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.414148e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.365436 sec
+TOTAL       :     1.445011 sec
 INFO: No Floating Point Exceptions have been reported
-     3,906,564,487      cycles                           #    2.852 GHz                    
-     8,246,033,428      instructions                     #    2.11  insn per cycle         
-       1.370596175 seconds time elapsed
+     4,026,136,405      cycles                           #    2.773 GHz                    
+     8,355,233,205      instructions                     #    2.08  insn per cycle         
+       1.452615562 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.738190e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.991307e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.991307e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.729960e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.002290e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.002290e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404207e-03 )  GeV^0
-TOTAL       :     1.342405 sec
+TOTAL       :     1.378766 sec
 INFO: No Floating Point Exceptions have been reported
-     3,715,161,732      cycles                           #    2.758 GHz                    
-     7,867,702,148      instructions                     #    2.12  insn per cycle         
-       1.347728919 seconds time elapsed
+     3,842,420,700      cycles                           #    2.773 GHz                    
+     7,976,068,469      instructions                     #    2.08  insn per cycle         
+       1.386498519 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3208) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.657423e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.354178e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.354178e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.464492e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.118413e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.118413e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.079550e+00 +- 3.404208e-03 )  GeV^0
-TOTAL       :     1.720331 sec
+TOTAL       :     1.801449 sec
 INFO: No Floating Point Exceptions have been reported
-     3,434,778,202      cycles                           #    1.991 GHz                    
-     6,046,919,674      instructions                     #    1.76  insn per cycle         
-       1.725698557 seconds time elapsed
+     3,544,493,993      cycles                           #    1.960 GHz                    
+     6,155,712,678      instructions                     #    1.74  insn per cycle         
+       1.809068044 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2249) (512y:   24) (512z: 2155)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
index a26ce13557..d6446e7404 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:52:19
+DATE: 2024-09-15_11:56:02
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.300334e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.753955e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.878936e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.563506e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.776392e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.890773e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.515714 sec
+TOTAL       :     0.519623 sec
 INFO: No Floating Point Exceptions have been reported
-     2,131,743,033      cycles                           #    2.892 GHz                    
-     3,363,258,452      instructions                     #    1.58  insn per cycle         
-       0.793874595 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
+     2,142,971,519      cycles                           #    2.865 GHz                    
+     3,298,601,891      instructions                     #    1.54  insn per cycle         
+       0.804804703 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.955916e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.010652e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.010652e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.921389e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.976206e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.976206e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.445137 sec
+TOTAL       :     5.559284 sec
 INFO: No Floating Point Exceptions have been reported
-    16,242,922,108      cycles                           #    2.981 GHz                    
-    45,332,069,683      instructions                     #    2.79  insn per cycle         
-       5.450297847 seconds time elapsed
+    16,314,564,993      cycles                           #    2.932 GHz                    
+    45,379,598,894      instructions                     #    2.78  insn per cycle         
+       5.566847007 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.593016e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.934269e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.934269e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.507753e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.846242e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.846242e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.367261 sec
+TOTAL       :     2.429783 sec
 INFO: No Floating Point Exceptions have been reported
-     7,054,204,460      cycles                           #    2.974 GHz                    
-    17,770,139,491      instructions                     #    2.52  insn per cycle         
-       2.372459853 seconds time elapsed
+     7,122,810,590      cycles                           #    2.924 GHz                    
+    17,820,548,090      instructions                     #    2.50  insn per cycle         
+       2.437485624 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.502328e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.661035e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.661035e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.248249e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.390772e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.390772e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.317120 sec
+TOTAL       :     1.373977 sec
 INFO: No Floating Point Exceptions have been reported
-     3,741,592,813      cycles                           #    2.831 GHz                    
-     8,261,772,173      instructions                     #    2.21  insn per cycle         
-       1.322339821 seconds time elapsed
+     3,800,696,374      cycles                           #    2.752 GHz                    
+     8,311,474,483      instructions                     #    2.19  insn per cycle         
+       1.381547073 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.964572e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.026851e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.026851e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.515740e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.758099e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.758099e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.253784 sec
+TOTAL       :     1.334783 sec
 INFO: No Floating Point Exceptions have been reported
-     3,555,905,791      cycles                           #    2.826 GHz                    
-     7,915,891,992      instructions                     #    2.23  insn per cycle         
-       1.259045889 seconds time elapsed
+     3,623,427,363      cycles                           #    2.701 GHz                    
+     7,966,643,374      instructions                     #    2.20  insn per cycle         
+       1.342318524 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3208) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.676750e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.352514e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.352514e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.461308e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.125295e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.125295e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.655409 sec
+TOTAL       :     1.726295 sec
 INFO: No Floating Point Exceptions have been reported
-     3,257,600,602      cycles                           #    1.963 GHz                    
-     6,097,719,225      instructions                     #    1.87  insn per cycle         
-       1.660515865 seconds time elapsed
+     3,316,188,727      cycles                           #    1.914 GHz                    
+     6,146,469,798      instructions                     #    1.85  insn per cycle         
+       1.733694711 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2249) (512y:   24) (512z: 2155)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
index cd228c7289..a7d3a3bcad 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,60 +11,60 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:49:35
+DATE: 2024-09-15_11:53:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.610160e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.729460e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.851466e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.603732e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.769962e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.882903e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086805e+00 +- 3.414078e-03 )  GeV^0
-TOTAL       :     0.621610 sec
+TOTAL       :     0.632677 sec
 INFO: No Floating Point Exceptions have been reported
-     2,495,868,152      cycles                           #    2.929 GHz                    
-     3,886,369,127      instructions                     #    1.56  insn per cycle         
-       0.908856599 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
+     2,459,588,784      cycles                           #    2.873 GHz                    
+     3,830,411,115      instructions                     #    1.56  insn per cycle         
+       0.912203024 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -72,33 +72,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.971253e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.026799e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.026799e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.920423e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.975843e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.975843e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.403033 sec
+TOTAL       :     5.564298 sec
 INFO: No Floating Point Exceptions have been reported
-    16,249,477,757      cycles                           #    3.005 GHz                    
-    45,335,346,673      instructions                     #    2.79  insn per cycle         
-       5.408147099 seconds time elapsed
+    16,322,815,770      cycles                           #    2.930 GHz                    
+    45,379,862,622      instructions                     #    2.78  insn per cycle         
+       5.571986399 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -106,31 +106,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.620852e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.968419e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.968419e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.531426e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.870727e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.870727e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.355141 sec
+TOTAL       :     2.417527 sec
 INFO: No Floating Point Exceptions have been reported
-     7,053,446,897      cycles                           #    2.989 GHz                    
-    17,770,125,835      instructions                     #    2.52  insn per cycle         
-       2.360367399 seconds time elapsed
+     7,119,927,533      cycles                           #    2.937 GHz                    
+    17,819,194,741      instructions                     #    2.50  insn per cycle         
+       2.425172051 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3133) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -138,31 +138,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.504482e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.644248e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.644248e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.302342e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.447707e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.447707e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.317218 sec
+TOTAL       :     1.366211 sec
 INFO: No Floating Point Exceptions have been reported
-     3,740,370,277      cycles                           #    2.830 GHz                    
-     8,261,533,304      instructions                     #    2.21  insn per cycle         
-       1.322470933 seconds time elapsed
+     3,809,953,710      cycles                           #    2.774 GHz                    
+     8,311,255,796      instructions                     #    2.18  insn per cycle         
+       1.374030854 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3364) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -170,31 +170,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.002178e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.030213e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.030213e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.793036e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.010397e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.010397e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.248992 sec
+TOTAL       :     1.299199 sec
 INFO: No Floating Point Exceptions have been reported
-     3,542,544,565      cycles                           #    2.826 GHz                    
-     7,916,798,793      instructions                     #    2.23  insn per cycle         
-       1.254257229 seconds time elapsed
+     3,629,527,941      cycles                           #    2.777 GHz                    
+     7,964,563,950      instructions                     #    2.19  insn per cycle         
+       1.308355970 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3208) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -202,31 +202,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.739256e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.427515e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.427515e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.490800e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.161305e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.161305e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.640887 sec
+TOTAL       :     1.720915 sec
 INFO: No Floating Point Exceptions have been reported
-     3,256,631,314      cycles                           #    1.979 GHz                    
-     6,096,520,142      instructions                     #    1.87  insn per cycle         
-       1.646153425 seconds time elapsed
+     3,334,705,650      cycles                           #    1.929 GHz                    
+     6,144,839,228      instructions                     #    1.84  insn per cycle         
+       1.729518385 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2249) (512y:   24) (512z: 2155)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -234,8 +234,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
index ed08ab638a..1a7a19dcfe 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:15:11
+DATE: 2024-09-15_11:12:13
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.789940e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.792429e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.906090e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.207213e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.745203e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.857795e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.489009 sec
+TOTAL       :     0.487469 sec
 INFO: No Floating Point Exceptions have been reported
-     2,018,318,532      cycles                           #    2.827 GHz                    
-     2,895,980,289      instructions                     #    1.43  insn per cycle         
-       0.772992983 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,053,225,612      cycles                           #    2.869 GHz                    
+     2,963,841,312      instructions                     #    1.44  insn per cycle         
+       0.771871582 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 126
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.940323e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.995439e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.995439e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.964254e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.021824e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.021824e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     5.488792 sec
+TOTAL       :     5.438732 sec
 INFO: No Floating Point Exceptions have been reported
-    15,940,533,363      cycles                           #    2.902 GHz                    
-    44,444,792,927      instructions                     #    2.79  insn per cycle         
-       5.494044330 seconds time elapsed
+    16,004,474,289      cycles                           #    2.939 GHz                    
+    44,480,990,455      instructions                     #    2.78  insn per cycle         
+       5.447104045 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  536) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288198669441044
 Relative difference = 6.558289825352968e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.239530e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.705587e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.705587e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.270854e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.739248e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.739248e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.087450 sec
+TOTAL       :     2.093047 sec
 INFO: No Floating Point Exceptions have been reported
-     6,069,051,730      cycles                           #    2.901 GHz                    
-    17,076,951,521      instructions                     #    2.81  insn per cycle         
-       2.092819349 seconds time elapsed
+     6,146,902,561      cycles                           #    2.927 GHz                    
+    17,124,330,277      instructions                     #    2.79  insn per cycle         
+       2.101358753 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2864) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193075684831
 Relative difference = 1.515997647531052e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.981279e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.548807e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.548807e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.010634e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.590643e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.590643e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.838037 sec
+TOTAL       :     1.845256 sec
 INFO: No Floating Point Exceptions have been reported
-     5,020,094,590      cycles                           #    2.724 GHz                    
-    10,226,757,184      instructions                     #    2.04  insn per cycle         
-       1.843419915 seconds time elapsed
+     5,092,053,198      cycles                           #    2.749 GHz                    
+    10,266,716,383      instructions                     #    2.02  insn per cycle         
+       1.853336006 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3907) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.249511e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.851451e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.851451e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.078233e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.679535e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.679535e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.760706 sec
+TOTAL       :     1.828978 sec
 INFO: No Floating Point Exceptions have been reported
-     4,966,480,857      cycles                           #    2.814 GHz                    
-     9,996,514,844      instructions                     #    2.01  insn per cycle         
-       1.765809387 seconds time elapsed
+     5,049,904,876      cycles                           #    2.749 GHz                    
+    10,046,122,437      instructions                     #    1.99  insn per cycle         
+       1.837563375 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3806) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181869545951
 Relative difference = 9.214951531400725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.805571e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.146052e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.146052e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.649805e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.982891e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.982891e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     2.264482 sec
+TOTAL       :     2.359308 sec
 INFO: No Floating Point Exceptions have been reported
-     4,363,518,057      cycles                           #    1.923 GHz                    
-     8,444,795,066      instructions                     #    1.94  insn per cycle         
-       2.269675299 seconds time elapsed
+     4,441,902,341      cycles                           #    1.877 GHz                    
+     8,494,262,942      instructions                     #    1.91  insn per cycle         
+       2.367884804 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2746) (512y:    4) (512z: 2754)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183148950338
 Relative difference = 1.5521108056421764e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
index d4756ce596..7bcb20b104 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:34:35
+DATE: 2024-09-15_11:36:18
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.603374e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.765581e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.882221e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.281805e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.729157e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.845967e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.485644 sec
+TOTAL       :     0.487987 sec
 INFO: No Floating Point Exceptions have been reported
-     2,066,870,474      cycles                           #    2.910 GHz                    
-     3,000,798,855      instructions                     #    1.45  insn per cycle         
-       0.768445011 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
+     2,055,281,728      cycles                           #    2.872 GHz                    
+     2,956,266,509      instructions                     #    1.44  insn per cycle         
+       0.772297546 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.546441e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.640080e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.640080e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.496211e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.588163e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.588163e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     4.201218 sec
+TOTAL       :     4.287732 sec
 INFO: No Floating Point Exceptions have been reported
-    12,581,623,324      cycles                           #    2.992 GHz                    
-    34,609,638,297      instructions                     #    2.75  insn per cycle         
-       4.206417902 seconds time elapsed
+    12,584,199,997      cycles                           #    2.932 GHz                    
+    34,606,962,286      instructions                     #    2.75  insn per cycle         
+       4.293417398 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  683) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288199094356969
 Relative difference = 4.463890496342449e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.413724e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.896525e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.896525e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.317872e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.783743e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.783743e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.021769 sec
+TOTAL       :     2.058410 sec
 INFO: No Floating Point Exceptions have been reported
-     6,062,874,785      cycles                           #    2.992 GHz                    
-    14,848,758,253      instructions                     #    2.45  insn per cycle         
-       2.026822059 seconds time elapsed
+     6,058,288,486      cycles                           #    2.936 GHz                    
+    14,847,536,122      instructions                     #    2.45  insn per cycle         
+       2.064093895 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2980) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193414453417
 Relative difference = 1.6829758681196702e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.309646e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.143470e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.143470e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.131335e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.950373e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.950373e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.516599 sec
+TOTAL       :     1.556955 sec
 INFO: No Floating Point Exceptions have been reported
-     4,292,704,034      cycles                           #    2.823 GHz                    
-     9,054,713,499      instructions                     #    2.11  insn per cycle         
-       1.521729173 seconds time elapsed
+     4,316,973,163      cycles                           #    2.764 GHz                    
+     9,053,302,579      instructions                     #    2.10  insn per cycle         
+       1.562583378 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4460) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181974319741
 Relative difference = 9.731379272303266e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.509425e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.388317e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.388317e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.308917e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.178890e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.178890e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.478807 sec
+TOTAL       :     1.521257 sec
 INFO: No Floating Point Exceptions have been reported
-     4,198,253,929      cycles                           #    2.830 GHz                    
-     8,663,551,193      instructions                     #    2.06  insn per cycle         
-       1.483957255 seconds time elapsed
+     4,205,210,775      cycles                           #    2.756 GHz                    
+     8,662,511,141      instructions                     #    2.06  insn per cycle         
+       1.526851661 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4225) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288181974319741
 Relative difference = 9.731379272303266e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.598209e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.065067e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.065067e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.372555e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.809988e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.809988e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.957003 sec
+TOTAL       :     2.038768 sec
 INFO: No Floating Point Exceptions have been reported
-     3,829,608,493      cycles                           #    1.953 GHz                    
-     7,806,553,120      instructions                     #    2.04  insn per cycle         
-       1.962132869 seconds time elapsed
+     3,837,026,814      cycles                           #    1.878 GHz                    
+     7,805,330,859      instructions                     #    2.03  insn per cycle         
+       2.044464874 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4273) (512y:    0) (512z: 2558)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183246739209
 Relative difference = 1.6003107281264138e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
index ae30e9c24e..69afb6ef9f 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_f_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:34:55
+DATE: 2024-09-15_11:36:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.609521e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.765362e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.882743e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.211144e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.722563e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.857000e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086719e+00 +- 3.413389e-03 )  GeV^0
-TOTAL       :     0.484090 sec
+TOTAL       :     0.487617 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,775,597      cycles                           #    2.915 GHz                    
-     2,991,357,430      instructions                     #    1.45  insn per cycle         
-       0.767321876 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
+     2,060,698,152      cycles                           #    2.879 GHz                    
+     2,912,783,409      instructions                     #    1.41  insn per cycle         
+       0.772686787 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 126
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028811e+00
 Avg ME (F77/GPU)   = 2.0288499356247485
 Relative difference = 1.9191351362116207e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.594738e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.692575e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.692575e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.666171e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.771083e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.771083e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086780e+00 +- 3.413794e-03 )  GeV^0
-TOTAL       :     4.124478 sec
+TOTAL       :     4.019199 sec
 INFO: No Floating Point Exceptions have been reported
-    11,853,777,518      cycles                           #    2.871 GHz                    
-    35,078,486,896      instructions                     #    2.96  insn per cycle         
-       4.129581902 seconds time elapsed
+    11,827,953,010      cycles                           #    2.940 GHz                    
+    35,076,444,454      instructions                     #    2.97  insn per cycle         
+       4.024833106 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  453) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028820e+00
 Avg ME (F77/C++)    = 2.0288199094356969
 Relative difference = 4.463890496342449e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.387597e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.876622e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.876622e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.415928e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.902704e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.902704e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086779e+00 +- 3.413793e-03 )  GeV^0
-TOTAL       :     2.032118 sec
+TOTAL       :     2.023749 sec
 INFO: No Floating Point Exceptions have been reported
-     5,954,220,246      cycles                           #    2.924 GHz                    
-    14,471,125,804      instructions                     #    2.43  insn per cycle         
-       2.037354801 seconds time elapsed
+     5,953,573,213      cycles                           #    2.935 GHz                    
+    14,468,346,196      instructions                     #    2.43  insn per cycle         
+       2.029398775 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2559) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028819e+00
 Avg ME (F77/C++)    = 2.0288193583255634
 Relative difference = 1.7661780742548925e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.547735e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.457816e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.457816e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.382613e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.271116e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.271116e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.473387 sec
+TOTAL       :     1.507601 sec
 INFO: No Floating Point Exceptions have been reported
-     4,172,232,401      cycles                           #    2.823 GHz                    
-     8,881,553,593      instructions                     #    2.13  insn per cycle         
-       1.478367886 seconds time elapsed
+     4,170,609,924      cycles                           #    2.758 GHz                    
+     8,881,070,721      instructions                     #    2.13  insn per cycle         
+       1.513291878 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3570) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288182104704902
 Relative difference = 1.0374044905426431e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.564022e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.474971e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.474971e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.441367e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.356283e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.356283e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.470326 sec
+TOTAL       :     1.495607 sec
 INFO: No Floating Point Exceptions have been reported
-     4,127,002,434      cycles                           #    2.799 GHz                    
-     8,410,002,269      instructions                     #    2.04  insn per cycle         
-       1.475340782 seconds time elapsed
+     4,129,531,699      cycles                           #    2.752 GHz                    
+     8,406,651,679      instructions                     #    2.04  insn per cycle         
+       1.501280641 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3296) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288182104704902
 Relative difference = 1.0374044905426431e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.698236e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.184991e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.184991e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.439343e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.891063e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.891063e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086810e+00 +- 3.414231e-03 )  GeV^0
-TOTAL       :     1.924077 sec
+TOTAL       :     2.014863 sec
 INFO: No Floating Point Exceptions have been reported
-     3,774,778,097      cycles                           #    1.958 GHz                    
-     7,699,447,548      instructions                     #    2.04  insn per cycle         
-       1.929340998 seconds time elapsed
+     3,794,076,081      cycles                           #    1.879 GHz                    
+     7,699,347,303      instructions                     #    2.03  insn per cycle         
+       2.020593600 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3289) (512y:    0) (512z: 2110)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028818e+00
 Avg ME (F77/C++)    = 2.0288183204829693
 Relative difference = 1.5796536184903122e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
index a3419d3eba..5fcfefd8b1 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:15:33
+DATE: 2024-09-15_11:12:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.656233e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.473409e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.009011e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.360888e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.282446e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.948861e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.524071 sec
+TOTAL       :     0.535947 sec
 INFO: No Floating Point Exceptions have been reported
-     2,200,106,592      cycles                           #    2.912 GHz                    
-     3,136,630,811      instructions                     #    1.43  insn per cycle         
-       0.813499232 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,206,968,400      cycles                           #    2.863 GHz                    
+     3,177,366,447      instructions                     #    1.44  insn per cycle         
+       0.829346124 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063423243874
 Relative difference = 3.241686432649386e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.842313e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.889136e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.889136e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.801369e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.847904e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.847904e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.797564 sec
+TOTAL       :     5.967015 sec
 INFO: No Floating Point Exceptions have been reported
-    17,380,525,335      cycles                           #    2.996 GHz                    
-    46,085,182,385      instructions                     #    2.65  insn per cycle         
-       5.802980374 seconds time elapsed
+    17,524,709,788      cycles                           #    2.932 GHz                    
+    46,191,860,900      instructions                     #    2.64  insn per cycle         
+       5.978935443 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  617) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063903750300
 Relative difference = 3.0048445715164216e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.264315e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.428282e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.428282e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.140467e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.299172e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.299172e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.317159 sec
+TOTAL       :     3.487825 sec
 INFO: No Floating Point Exceptions have been reported
-     9,927,217,134      cycles                           #    2.989 GHz                    
-    27,592,170,928      instructions                     #    2.78  insn per cycle         
-       3.322556315 seconds time elapsed
+    10,261,674,067      cycles                           #    2.934 GHz                    
+    27,722,537,189      instructions                     #    2.70  insn per cycle         
+       3.498978005 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063903750300
 Relative difference = 3.0048445715164216e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.084318e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.477386e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.477386e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.032137e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.431710e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.431710e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.167284 sec
+TOTAL       :     2.231910 sec
 INFO: No Floating Point Exceptions have been reported
-     6,096,775,135      cycles                           #    2.807 GHz                    
-    12,483,393,435      instructions                     #    2.05  insn per cycle         
-       2.172749966 seconds time elapsed
+     6,175,976,175      cycles                           #    2.753 GHz                    
+    12,601,670,185      instructions                     #    2.04  insn per cycle         
+       2.244594220 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2774) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.317310e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.753244e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.753244e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.534616e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.015525e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.015525e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.077970 sec
+TOTAL       :     2.042072 sec
 INFO: No Floating Point Exceptions have been reported
-     5,527,641,053      cycles                           #    2.654 GHz                    
-    11,920,275,101      instructions                     #    2.16  insn per cycle         
-       2.083291823 seconds time elapsed
+     5,669,805,165      cycles                           #    2.764 GHz                    
+    12,036,562,183      instructions                     #    2.12  insn per cycle         
+       2.054181107 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2519) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.734122e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.938328e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.938328e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.587962e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.783361e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.783361e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.911727 sec
+TOTAL       :     3.070999 sec
 INFO: No Floating Point Exceptions have been reported
-     5,583,846,989      cycles                           #    1.915 GHz                    
-     8,109,763,701      instructions                     #    1.45  insn per cycle         
-       2.917087683 seconds time elapsed
+     5,754,052,644      cycles                           #    1.867 GHz                    
+     8,225,264,257      instructions                     #    1.43  insn per cycle         
+       3.082911381 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1671) (512y:  126) (512z: 1863)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
diff --git a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
index 245c1b9aa1..f3ccad1744 100644
--- a/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggtt_mad/log_ggtt_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:15:57
+DATE: 2024-09-15_11:13:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.642987e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.413319e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.000860e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.403841e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.350229e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.960040e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     0.521821 sec
+TOTAL       :     0.532145 sec
 INFO: No Floating Point Exceptions have been reported
-     2,190,325,490      cycles                           #    2.907 GHz                    
-     3,184,963,858      instructions                     #    1.45  insn per cycle         
-       0.810757695 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,202,225,096      cycles                           #    2.860 GHz                    
+     3,140,327,784      instructions                     #    1.43  insn per cycle         
+       0.826897706 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.028807e+00
 Avg ME (F77/GPU)   = 2.0288063423243874
 Relative difference = 3.241686432649386e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.867143e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.915636e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.915636e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.849779e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.898891e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.898891e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     5.722177 sec
+TOTAL       :     5.812131 sec
 INFO: No Floating Point Exceptions have been reported
-    16,937,691,218      cycles                           #    2.958 GHz                    
-    45,108,922,900      instructions                     #    2.66  insn per cycle         
-       5.727585042 seconds time elapsed
+    17,080,273,912      cycles                           #    2.934 GHz                    
+    45,215,696,703      instructions                     #    2.65  insn per cycle         
+       5.823642020 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  568) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063903750300
 Relative difference = 3.0048445715164216e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.410851e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.588061e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.588061e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.353337e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.532203e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.532203e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     3.179584 sec
+TOTAL       :     3.272970 sec
 INFO: No Floating Point Exceptions have been reported
-     9,489,177,146      cycles                           #    2.981 GHz                    
-    26,245,079,944      instructions                     #    2.77  insn per cycle         
-       3.185047895 seconds time elapsed
+     9,622,203,648      cycles                           #    2.930 GHz                    
+    26,352,115,115      instructions                     #    2.74  insn per cycle         
+       3.284795843 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288063903750300
 Relative difference = 3.0048445715164216e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.580524e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.899812e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.899812e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.499939e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.814164e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.814164e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.394558 sec
+TOTAL       :     2.448542 sec
 INFO: No Floating Point Exceptions have been reported
-     6,736,073,752      cycles                           #    2.808 GHz                    
-    14,030,605,615      instructions                     #    2.08  insn per cycle         
-       2.400088375 seconds time elapsed
+     6,760,703,277      cycles                           #    2.754 GHz                    
+    14,051,302,777      instructions                     #    2.08  insn per cycle         
+       2.455916079 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2896) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.807671e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.164109e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.164109e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.725371e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.062901e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.062901e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.285657 sec
+TOTAL       :     2.336085 sec
 INFO: No Floating Point Exceptions have been reported
-     6,414,671,963      cycles                           #    2.801 GHz                    
-    13,521,130,801      instructions                     #    2.11  insn per cycle         
-       2.290985773 seconds time elapsed
+     6,433,871,158      cycles                           #    2.746 GHz                    
+    13,544,684,713      instructions                     #    2.11  insn per cycle         
+       2.343643276 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2535) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.674213e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.870504e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.870504e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.546154e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.730746e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.730746e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.086689e+00 +- 3.413217e-03 )  GeV^0
-TOTAL       :     2.959862 sec
+TOTAL       :     3.074398 sec
 INFO: No Floating Point Exceptions have been reported
-     5,588,271,185      cycles                           #    1.885 GHz                    
-     9,207,541,120      instructions                     #    1.65  insn per cycle         
-       2.965215793 seconds time elapsed
+     5,684,367,008      cycles                           #    1.845 GHz                    
+     9,231,965,840      instructions                     #    1.62  insn per cycle         
+       3.081687192 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  212) (512z: 2060)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.028807e+00
 Avg ME (F77/C++)    = 2.0288064057068964
 Relative difference = 2.9292737240031234e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
index 570203dc1b..0fe4cfc922 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:16:22
+DATE: 2024-09-15_11:13:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.798374e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.983326e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.078543e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.646703e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.903323e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.008440e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.470552 sec
+TOTAL       :     0.472515 sec
 INFO: No Floating Point Exceptions have been reported
-     1,997,326,811      cycles                           #    2.908 GHz                    
-     2,877,302,032      instructions                     #    1.44  insn per cycle         
-       0.743546221 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,976,395,629      cycles                           #    2.864 GHz                    
+     2,853,369,004      instructions                     #    1.44  insn per cycle         
+       0.746437756 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.058137e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.233632e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.243435e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.044065e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.229313e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.240372e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.606392 sec
+TOTAL       :     0.613454 sec
 INFO: No Floating Point Exceptions have been reported
-     2,483,568,255      cycles                           #    2.922 GHz                    
-     3,806,899,478      instructions                     #    1.53  insn per cycle         
-       0.908657999 seconds time elapsed
+     2,457,306,952      cycles                           #    2.873 GHz                    
+     3,760,458,763      instructions                     #    1.53  insn per cycle         
+       0.914581816 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.413122e+00
 Avg ME (F77/GPU)   = 1.4131213684418649
 Relative difference = 4.469239988637851e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.474741e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.486951e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.486951e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.423163e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.435086e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.435086e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.640929 sec
+TOTAL       :     6.782254 sec
 INFO: No Floating Point Exceptions have been reported
-    19,899,191,669      cycles                           #    2.995 GHz                    
-    59,914,426,502      instructions                     #    3.01  insn per cycle         
-       6.645138273 seconds time elapsed
+    19,933,068,888      cycles                           #    2.938 GHz                    
+    59,910,639,029      instructions                     #    3.01  insn per cycle         
+       6.786428407 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.659881e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.702510e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.702510e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.574256e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.615519e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.615519e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.535409 sec
+TOTAL       :     3.601510 sec
 INFO: No Floating Point Exceptions have been reported
-    10,570,379,022      cycles                           #    2.987 GHz                    
-    31,084,508,228      instructions                     #    2.94  insn per cycle         
-       3.539526407 seconds time elapsed
+    10,564,249,920      cycles                           #    2.931 GHz                    
+    31,083,049,027      instructions                     #    2.94  insn per cycle         
+       3.605720194 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.311038e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.477508e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.477508e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.109445e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.271688e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.271688e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.779040 sec
+TOTAL       :     1.818340 sec
 INFO: No Floating Point Exceptions have been reported
-     4,998,241,265      cycles                           #    2.804 GHz                    
-    11,405,241,402      instructions                     #    2.28  insn per cycle         
-       1.783292570 seconds time elapsed
+     4,995,758,651      cycles                           #    2.742 GHz                    
+    11,404,411,821      instructions                     #    2.28  insn per cycle         
+       1.822613950 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4642) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.053479e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.074459e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.074459e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.031962e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.052518e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.052518e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.574748 sec
+TOTAL       :     1.607425 sec
 INFO: No Floating Point Exceptions have been reported
-     4,439,382,728      cycles                           #    2.813 GHz                    
-    10,663,698,368      instructions                     #    2.40  insn per cycle         
-       1.578969286 seconds time elapsed
+     4,440,179,427      cycles                           #    2.756 GHz                    
+    10,663,032,994      instructions                     #    2.40  insn per cycle         
+       1.611644858 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4378) (512y:   91) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.352968e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.457289e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.457289e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.095641e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.193407e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.193407e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.250021 sec
+TOTAL       :     2.331432 sec
 INFO: No Floating Point Exceptions have been reported
-     4,132,805,814      cycles                           #    1.834 GHz                    
-     5,966,681,608      instructions                     #    1.44  insn per cycle         
-       2.254297849 seconds time elapsed
+     4,128,663,715      cycles                           #    1.768 GHz                    
+     5,965,561,050      instructions                     #    1.44  insn per cycle         
+       2.335809030 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1617) (512y:   95) (512z: 3577)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416484
 Relative difference = 4.469241520660492e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
index 2c341e188f..555f99fae8 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:43:30
+DATE: 2024-09-15_11:47:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.550752e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.221480e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.221480e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.462205e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.092748e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.092748e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.496080 sec
+TOTAL       :     0.504368 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,067,450,792      cycles                           #    2.916 GHz                    
-     3,137,465,781      instructions                     #    1.52  insn per cycle         
-       0.766248980 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     2,053,925,551      cycles                           #    2.862 GHz                    
+     3,120,835,610      instructions                     #    1.52  insn per cycle         
+       0.775288198 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.695395e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.206740e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.206740e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.695563e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.383097e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.383097e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.824513 sec
+TOTAL       :     0.834031 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,173,134,566      cycles                           #    2.947 GHz                    
-     5,085,043,151      instructions                     #    1.60  insn per cycle         
-       1.134769459 seconds time elapsed
+     3,122,984,951      cycles                           #    2.884 GHz                    
+     5,028,895,726      instructions                     #    1.61  insn per cycle         
+       1.144542739 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.413122e+00
 Avg ME (F77/GPU)   = 1.4131213684418649
 Relative difference = 4.469239988637851e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.485342e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.497625e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.497625e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.420974e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.433351e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.433351e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.619749 sec
+TOTAL       :     6.797187 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    19,931,458,281      cycles                           #    3.010 GHz                    
-    59,921,540,931      instructions                     #    3.01  insn per cycle         
-       6.624128046 seconds time elapsed
+    19,924,578,475      cycles                           #    2.930 GHz                    
+    59,919,807,490      instructions                     #    3.01  insn per cycle         
+       6.801426045 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1199) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.696388e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.739462e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.739462e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.519993e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.562146e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.562146e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.515800 sec
+TOTAL       :     3.653302 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,606,854,369      cycles                           #    3.014 GHz                    
-    31,134,291,194      instructions                     #    2.94  insn per cycle         
-       3.520321812 seconds time elapsed
+    10,746,732,815      cycles                           #    2.939 GHz                    
+    31,134,499,346      instructions                     #    2.90  insn per cycle         
+       3.657616586 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5221) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.249974e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.417505e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.417505e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.065515e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.233179e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.233179e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.799007 sec
+TOTAL       :     1.835810 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,036,417,695      cycles                           #    2.794 GHz                    
-    11,456,150,189      instructions                     #    2.27  insn per cycle         
-       1.803522033 seconds time elapsed
+     5,036,150,788      cycles                           #    2.739 GHz                    
+    11,457,434,104      instructions                     #    2.28  insn per cycle         
+       1.839969686 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4642) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.043850e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.065311e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.065311e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.012086e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.033024e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.033024e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.596740 sec
+TOTAL       :     1.647583 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,500,129,968      cycles                           #    2.812 GHz                    
-    10,714,925,901      instructions                     #    2.38  insn per cycle         
-       1.601086866 seconds time elapsed
+     4,499,476,819      cycles                           #    2.725 GHz                    
+    10,716,818,624      instructions                     #    2.38  insn per cycle         
+       1.651828196 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4378) (512y:   91) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.384293e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.494992e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.494992e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.046506e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.146814e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.146814e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.248261 sec
+TOTAL       :     2.355729 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,170,685,810      cycles                           #    1.852 GHz                    
-     6,006,473,100      instructions                     #    1.44  insn per cycle         
-       2.252706319 seconds time elapsed
+     4,171,753,505      cycles                           #    1.769 GHz                    
+     6,006,835,350      instructions                     #    1.44  insn per cycle         
+       2.359914843 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1617) (512y:   95) (512z: 3577)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416484
 Relative difference = 4.469241520660492e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
index 3fb3a47ab7..df418c0c55 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:16:48
+DATE: 2024-09-15_11:13:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.834088e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.998800e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.096289e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.819441e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.940165e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.036270e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.471420 sec
+TOTAL       :     0.473171 sec
 INFO: No Floating Point Exceptions have been reported
-     1,961,056,023      cycles                           #    2.850 GHz                    
-     2,802,491,151      instructions                     #    1.43  insn per cycle         
-       0.745710565 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,973,905,564      cycles                           #    2.864 GHz                    
+     2,835,389,936      instructions                     #    1.44  insn per cycle         
+       0.747859769 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.046967e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.218286e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.227874e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.045923e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.239053e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.249723e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.603989 sec
+TOTAL       :     0.613723 sec
 INFO: No Floating Point Exceptions have been reported
-     2,497,495,736      cycles                           #    2.934 GHz                    
-     3,810,103,118      instructions                     #    1.53  insn per cycle         
-       0.911018204 seconds time elapsed
+     2,468,512,324      cycles                           #    2.879 GHz                    
+     3,722,507,305      instructions                     #    1.51  insn per cycle         
+       0.915283019 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.413122e+00
 Avg ME (F77/GPU)   = 1.4131213684418649
 Relative difference = 4.469239988637851e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.451702e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.463867e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.463867e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.416240e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.428350e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.428350e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.703085 sec
+TOTAL       :     6.802073 sec
 INFO: No Floating Point Exceptions have been reported
-    19,927,403,339      cycles                           #    2.972 GHz                    
-    60,128,261,455      instructions                     #    3.02  insn per cycle         
-       6.707279948 seconds time elapsed
+    19,919,234,926      cycles                           #    2.929 GHz                    
+    60,126,857,831      instructions                     #    3.02  insn per cycle         
+       6.806341598 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1322) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.705461e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.749260e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.749260e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.628115e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.671006e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.671006e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.501187 sec
+TOTAL       :     3.560395 sec
 INFO: No Floating Point Exceptions have been reported
-    10,472,673,137      cycles                           #    2.988 GHz                    
-    30,685,134,556      instructions                     #    2.93  insn per cycle         
-       3.505272548 seconds time elapsed
+    10,470,027,689      cycles                           #    2.938 GHz                    
+    30,685,175,745      instructions                     #    2.93  insn per cycle         
+       3.564357324 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5047) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684432433
 Relative difference = 4.46923023397472e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.078604e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.240538e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.240538e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.858005e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.013532e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.013532e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.823923 sec
+TOTAL       :     1.870006 sec
 INFO: No Floating Point Exceptions have been reported
-     5,123,559,158      cycles                           #    2.804 GHz                    
-    11,838,671,516      instructions                     #    2.31  insn per cycle         
-       1.828124881 seconds time elapsed
+     5,129,037,452      cycles                           #    2.738 GHz                    
+    11,838,972,708      instructions                     #    2.31  insn per cycle         
+       1.873874088 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4748) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.843118e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.002821e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.002821e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.652883e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.834044e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.834044e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.683810 sec
+TOTAL       :     1.717936 sec
 INFO: No Floating Point Exceptions have been reported
-     4,722,608,139      cycles                           #    2.799 GHz                    
-    11,163,576,715      instructions                     #    2.36  insn per cycle         
-       1.687961258 seconds time elapsed
+     4,726,163,144      cycles                           #    2.747 GHz                    
+    11,165,051,323      instructions                     #    2.36  insn per cycle         
+       1.721718897 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4403) (512y:  245) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416466
 Relative difference = 4.469241533230934e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.277733e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.387064e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.387064e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.029308e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.126499e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.126499e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.272861 sec
+TOTAL       :     2.353551 sec
 INFO: No Floating Point Exceptions have been reported
-     4,161,049,275      cycles                           #    1.828 GHz                    
-     6,218,241,048      instructions                     #    1.49  insn per cycle         
-       2.277009938 seconds time elapsed
+     4,165,348,623      cycles                           #    1.768 GHz                    
+     6,220,012,480      instructions                     #    1.49  insn per cycle         
+       2.357450464 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1513) (512y:  140) (512z: 3679)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213684416484
 Relative difference = 4.469241520660492e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
index 34c4be0b14..0344b19ae4 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:17:13
+DATE: 2024-09-15_11:14:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.699848e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.021832e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.054206e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.690436e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.002102e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.037933e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.455899 sec
+TOTAL       :     0.455498 sec
 INFO: No Floating Point Exceptions have been reported
-     1,955,913,787      cycles                           #    2.919 GHz                    
-     2,787,706,714      instructions                     #    1.43  insn per cycle         
-       0.727968230 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,928,242,310      cycles                           #    2.870 GHz                    
+     2,746,045,826      instructions                     #    1.42  insn per cycle         
+       0.728814382 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.759047e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.411217e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.451705e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.680469e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.378510e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.424836e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.502101 sec
+TOTAL       :     0.509889 sec
 INFO: No Floating Point Exceptions have been reported
-     2,122,727,370      cycles                           #    2.916 GHz                    
-     3,075,859,437      instructions                     #    1.45  insn per cycle         
-       0.785300798 seconds time elapsed
+     2,121,031,452      cycles                           #    2.862 GHz                    
+     3,036,959,694      instructions                     #    1.43  insn per cycle         
+       0.800065199 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.412607e+00
 Avg ME (F77/GPU)   = 1.4132214305330990
 Relative difference = 0.0004349621183379836
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.554989e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.568017e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.568017e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.504687e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.517708e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.517708e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.430950 sec
+TOTAL       :     6.560247 sec
 INFO: No Floating Point Exceptions have been reported
-    19,259,058,983      cycles                           #    2.994 GHz                    
-    59,617,547,503      instructions                     #    3.10  insn per cycle         
-       6.434961928 seconds time elapsed
+    19,257,464,373      cycles                           #    2.934 GHz                    
+    59,612,594,917      instructions                     #    3.10  insn per cycle         
+       6.564375492 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129949096991936
 Relative difference = 6.390737857384068e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.187345e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.324546e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.324546e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.084277e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.218488e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.218488e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     2.018757 sec
+TOTAL       :     2.043801 sec
 INFO: No Floating Point Exceptions have been reported
-     6,013,596,656      cycles                           #    2.974 GHz                    
-    17,061,733,957      instructions                     #    2.84  insn per cycle         
-       2.022888104 seconds time elapsed
+     6,009,096,977      cycles                           #    2.936 GHz                    
+    17,060,655,087      instructions                     #    2.84  insn per cycle         
+       2.047534449 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129954647353316
 Relative difference = 3.2890090308261873e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.775699e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.837851e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.837851e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.741509e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.801920e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.801920e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.940369 sec
+TOTAL       :     0.958794 sec
 INFO: No Floating Point Exceptions have been reported
-     2,635,402,093      cycles                           #    2.792 GHz                    
-     6,187,325,534      instructions                     #    2.35  insn per cycle         
-       0.944440901 seconds time elapsed
+     2,632,796,186      cycles                           #    2.737 GHz                    
+     6,187,347,650      instructions                     #    2.35  insn per cycle         
+       0.962496439 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5105) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.922412e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.996476e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.996476e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.912269e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.986419e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.986419e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.870203 sec
+TOTAL       :     0.875039 sec
 INFO: No Floating Point Exceptions have been reported
-     2,409,835,521      cycles                           #    2.759 GHz                    
-     5,791,929,833      instructions                     #    2.40  insn per cycle         
-       0.874296773 seconds time elapsed
+     2,407,469,182      cycles                           #    2.742 GHz                    
+     5,790,784,602      instructions                     #    2.41  insn per cycle         
+       0.878768885 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4908) (512y:   36) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.493996e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.538601e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.538601e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.443174e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.485583e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.485583e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.116135 sec
+TOTAL       :     1.155478 sec
 INFO: No Floating Point Exceptions have been reported
-     2,074,614,881      cycles                           #    1.853 GHz                    
-     3,391,488,863      instructions                     #    1.63  insn per cycle         
-       1.120258678 seconds time elapsed
+     2,073,615,836      cycles                           #    1.790 GHz                    
+     3,391,178,624      instructions                     #    1.64  insn per cycle         
+       1.159306518 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2235) (512y:   39) (512z: 3789)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413316e+00
 Avg ME (F77/C++)    = 1.4133164033579249
 Relative difference = 2.85398258307829e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
index c0e91bec2f..93fdf05be3 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:43:55
+DATE: 2024-09-15_11:47:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.587121e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.558472e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.558472e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.480682e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.545762e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.545762e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009071e+02 +- 5.002295e+01 )  GeV^-2
-TOTAL       :     0.464282 sec
+TOTAL       :     0.469335 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,970,383,464      cycles                           #    2.910 GHz                    
-     2,905,584,118      instructions                     #    1.47  insn per cycle         
-       0.733921324 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     1,971,009,268      cycles                           #    2.861 GHz                    
+     2,878,621,667      instructions                     #    1.46  insn per cycle         
+       0.747088963 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.517059e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.199084e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.199084e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.503732e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.296845e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.296845e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.737499e+02 +- 4.776369e+02 )  GeV^-2
-TOTAL       :     0.648992 sec
+TOTAL       :     0.653804 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,614,509,104      cycles                           #    2.934 GHz                    
-     3,983,626,905      instructions                     #    1.52  insn per cycle         
-       0.949135243 seconds time elapsed
+     2,546,264,462      cycles                           #    2.875 GHz                    
+     3,884,000,523      instructions                     #    1.53  insn per cycle         
+       0.944357505 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.412607e+00
 Avg ME (F77/GPU)   = 1.4132214305330990
 Relative difference = 0.0004349621183379836
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.569854e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.583003e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.583003e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.503119e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.516211e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.516211e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.397429 sec
+TOTAL       :     6.568401 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    19,269,183,636      cycles                           #    3.011 GHz                    
-    59,618,162,881      instructions                     #    3.09  insn per cycle         
-       6.401657275 seconds time elapsed
+    19,288,292,034      cycles                           #    2.936 GHz                    
+    59,615,397,281      instructions                     #    3.09  insn per cycle         
+       6.572330246 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  959) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129949096991936
 Relative difference = 6.390737857384068e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.278035e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.418496e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.418496e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.074244e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.211619e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.211619e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     2.000831 sec
+TOTAL       :     2.051717 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,033,592,364      cycles                           #    3.010 GHz                    
-    17,110,022,408      instructions                     #    2.84  insn per cycle         
-       2.004963711 seconds time elapsed
+     6,032,564,255      cycles                           #    2.936 GHz                    
+    17,108,905,426      instructions                     #    2.84  insn per cycle         
+       2.055577630 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129954647353316
 Relative difference = 3.2890090308261873e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.785493e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.848445e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.848445e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.735664e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.796597e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.796597e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.939049 sec
+TOTAL       :     0.965943 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,652,412,872      cycles                           #    2.814 GHz                    
-     6,224,549,306      instructions                     #    2.35  insn per cycle         
-       0.943195146 seconds time elapsed
+     2,652,269,080      cycles                           #    2.742 GHz                    
+     6,224,274,753      instructions                     #    2.35  insn per cycle         
+       0.969710233 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5105) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.876448e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.951702e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.951702e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.901208e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.974602e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.974602e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.896445 sec
+TOTAL       :     0.883566 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,434,305,262      cycles                           #    2.705 GHz                    
-     5,828,516,984      instructions                     #    2.39  insn per cycle         
-       0.900793892 seconds time elapsed
+     2,424,412,243      cycles                           #    2.734 GHz                    
+     5,827,930,388      instructions                     #    2.40  insn per cycle         
+       0.887425140 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4908) (512y:   36) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.434857e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.477996e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.477996e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.438672e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.481527e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.481527e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.167264 sec
+TOTAL       :     1.163548 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,096,069,854      cycles                           #    1.790 GHz                    
-     3,433,044,253      instructions                     #    1.64  insn per cycle         
-       1.171777259 seconds time elapsed
+     2,093,109,210      cycles                           #    1.794 GHz                    
+     3,432,132,802      instructions                     #    1.64  insn per cycle         
+       1.167531871 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2235) (512y:   39) (512z: 3789)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413316e+00
 Avg ME (F77/C++)    = 1.4133164033579249
 Relative difference = 2.85398258307829e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
index ae4211ca18..04fc107fbb 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:17:34
+DATE: 2024-09-15_11:14:38
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.749046e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.097285e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.132508e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.693098e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.040838e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.074373e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008472e+02 +- 5.002447e+01 )  GeV^-2
-TOTAL       :     0.453093 sec
+TOTAL       :     0.456865 sec
 INFO: No Floating Point Exceptions have been reported
-     1,937,831,306      cycles                           #    2.897 GHz                    
-     2,760,390,705      instructions                     #    1.42  insn per cycle         
-       0.725917028 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,922,600,682      cycles                           #    2.857 GHz                    
+     2,735,771,538      instructions                     #    1.42  insn per cycle         
+       0.729676644 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 226
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.753006e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.403951e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.444556e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.681790e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.366127e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.406124e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.630097e+02 +- 4.770717e+02 )  GeV^-2
-TOTAL       :     0.503839 sec
+TOTAL       :     0.509422 sec
 INFO: No Floating Point Exceptions have been reported
-     2,122,411,110      cycles                           #    2.908 GHz                    
-     3,090,140,496      instructions                     #    1.46  insn per cycle         
-       0.788810373 seconds time elapsed
+     2,117,565,229      cycles                           #    2.869 GHz                    
+     3,056,275,302      instructions                     #    1.44  insn per cycle         
+       0.796290434 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.412607e+00
 Avg ME (F77/GPU)   = 1.4132214305330990
 Relative difference = 0.0004349621183379836
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.532382e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.545218e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.545218e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.491972e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.504565e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.504565e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     6.488008 sec
+TOTAL       :     6.593721 sec
 INFO: No Floating Point Exceptions have been reported
-    19,400,700,603      cycles                           #    2.989 GHz                    
-    59,351,001,772      instructions                     #    3.06  insn per cycle         
-       6.491955397 seconds time elapsed
+    19,401,378,848      cycles                           #    2.941 GHz                    
+    59,351,233,195      instructions                     #    3.06  insn per cycle         
+       6.597810534 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1027) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129949096991936
 Relative difference = 6.390737857384068e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.590618e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.739335e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.739335e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.427450e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.574205e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.574205e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.009236e+02 +- 5.002643e+01 )  GeV^-2
-TOTAL       :     1.924170 sec
+TOTAL       :     1.961524 sec
 INFO: No Floating Point Exceptions have been reported
-     5,757,417,868      cycles                           #    2.987 GHz                    
-    16,849,627,882      instructions                     #    2.93  insn per cycle         
-       1.928257529 seconds time elapsed
+     5,763,417,063      cycles                           #    2.934 GHz                    
+    16,848,552,420      instructions                     #    2.92  insn per cycle         
+       1.965663621 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5611) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.412995e+00
 Avg ME (F77/C++)    = 1.4129954647353316
 Relative difference = 3.2890090308261873e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.552185e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.598756e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.598756e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.513418e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.559668e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.559668e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.073080 sec
+TOTAL       :     1.100420 sec
 INFO: No Floating Point Exceptions have been reported
-     3,016,650,359      cycles                           #    2.803 GHz                    
-     6,848,903,593      instructions                     #    2.27  insn per cycle         
-       1.077145956 seconds time elapsed
+     3,014,454,268      cycles                           #    2.733 GHz                    
+     6,847,622,992      instructions                     #    2.27  insn per cycle         
+       1.104094178 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5735) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.683535e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.739951e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.739951e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.641246e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.695747e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.695747e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008857e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     0.990408 sec
+TOTAL       :     1.015943 sec
 INFO: No Floating Point Exceptions have been reported
-     2,793,689,730      cycles                           #    2.811 GHz                    
-     6,437,259,695      instructions                     #    2.30  insn per cycle         
-       0.994438549 seconds time elapsed
+     2,793,517,683      cycles                           #    2.742 GHz                    
+     6,436,907,448      instructions                     #    2.30  insn per cycle         
+       1.019630864 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5509) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413313e+00
 Avg ME (F77/C++)    = 1.4133132969790267
 Relative difference = 2.1012969292986113e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.337472e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.374658e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.374658e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.322600e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.358678e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.358678e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008856e+02 +- 5.002468e+01 )  GeV^-2
-TOTAL       :     1.245257 sec
+TOTAL       :     1.258576 sec
 INFO: No Floating Point Exceptions have been reported
-     2,255,500,318      cycles                           #    1.807 GHz                    
-     3,755,716,500      instructions                     #    1.67  insn per cycle         
-       1.249336626 seconds time elapsed
+     2,248,626,373      cycles                           #    1.783 GHz                    
+     3,754,168,834      instructions                     #    1.67  insn per cycle         
+       1.262333902 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2466) (512y:   29) (512z: 4084)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413316e+00
 Avg ME (F77/C++)    = 1.4133164033579249
 Relative difference = 2.85398258307829e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
index f3f296c323..2641b6a6f8 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:17:55
+DATE: 2024-09-15_11:14:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.829779e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.992225e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.088067e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.553984e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.813684e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.925982e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.474231 sec
+TOTAL       :     0.473727 sec
 INFO: No Floating Point Exceptions have been reported
-     1,991,666,593      cycles                           #    2.889 GHz                    
-     2,862,009,939      instructions                     #    1.44  insn per cycle         
-       0.748188484 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,964,125,217      cycles                           #    2.842 GHz                    
+     2,850,802,933      instructions                     #    1.45  insn per cycle         
+       0.747533169 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.052360e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.225527e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.235296e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.039046e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.224514e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.235497e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.605817 sec
+TOTAL       :     0.623745 sec
 INFO: No Floating Point Exceptions have been reported
-     2,459,433,870      cycles                           #    2.917 GHz                    
-     3,767,298,634      instructions                     #    1.53  insn per cycle         
-       0.901915026 seconds time elapsed
+     2,491,113,981      cycles                           #    2.884 GHz                    
+     3,741,355,868      instructions                     #    1.50  insn per cycle         
+       0.924633822 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.413122e+00
 Avg ME (F77/GPU)   = 1.4131213755569487
 Relative difference = 4.418889885423659e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.430484e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.442316e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.442316e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.390069e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.401784e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.401784e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.761655 sec
+TOTAL       :     6.876043 sec
 INFO: No Floating Point Exceptions have been reported
-    20,208,420,881      cycles                           #    2.987 GHz                    
-    60,948,278,883      instructions                     #    3.02  insn per cycle         
-       6.765683895 seconds time elapsed
+    20,176,006,103      cycles                           #    2.934 GHz                    
+    60,944,588,650      instructions                     #    3.02  insn per cycle         
+       6.880217907 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1220) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213859069593
 Relative difference = 4.345647726386255e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.712730e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.755397e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.755397e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.624106e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.667122e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.667122e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.496170 sec
+TOTAL       :     3.563355 sec
 INFO: No Floating Point Exceptions have been reported
-    10,468,850,784      cycles                           #    2.992 GHz                    
-    30,820,867,995      instructions                     #    2.94  insn per cycle         
-       3.500304414 seconds time elapsed
+    10,467,283,500      cycles                           #    2.935 GHz                    
+    30,820,693,493      instructions                     #    2.94  insn per cycle         
+       3.567171047 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5351) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213792564823
 Relative difference = 4.392710025734405e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.398049e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.568353e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.568353e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.172379e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.336577e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.336577e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.763076 sec
+TOTAL       :     1.806966 sec
 INFO: No Floating Point Exceptions have been reported
-     4,951,526,153      cycles                           #    2.803 GHz                    
-    11,358,371,552      instructions                     #    2.29  insn per cycle         
-       1.767298886 seconds time elapsed
+     4,954,879,411      cycles                           #    2.737 GHz                    
+    11,359,422,816      instructions                     #    2.29  insn per cycle         
+       1.810872816 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4776) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213600217192
 Relative difference = 4.5288254008796884e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.064444e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.086290e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.086290e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.036379e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.057513e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.057513e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.558863 sec
+TOTAL       :     1.601666 sec
 INFO: No Floating Point Exceptions have been reported
-     4,376,598,291      cycles                           #    2.801 GHz                    
-    10,608,469,504      instructions                     #    2.42  insn per cycle         
-       1.562988882 seconds time elapsed
+     4,380,983,099      cycles                           #    2.729 GHz                    
+    10,610,165,712      instructions                     #    2.42  insn per cycle         
+       1.605990710 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4503) (512y:   83) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213600217192
 Relative difference = 4.5288254008796884e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.134020e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.230854e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.230854e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.900446e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.995461e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.995461e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.318741 sec
+TOTAL       :     2.397174 sec
 INFO: No Floating Point Exceptions have been reported
-     4,242,203,859      cycles                           #    1.827 GHz                    
-     6,164,589,981      instructions                     #    1.45  insn per cycle         
-       2.323012442 seconds time elapsed
+     4,245,323,919      cycles                           #    1.769 GHz                    
+     6,166,210,089      instructions                     #    1.45  insn per cycle         
+       2.401100901 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2140) (512y:  117) (512z: 3653)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213786174055
 Relative difference = 4.3972324717191576e-07
diff --git a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
index e346aeab23..0766319c3b 100644
--- a/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttg_mad/log_ggttg_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg'
 
-DATE: 2024-09-01_23:18:20
+DATE: 2024-09-15_11:15:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.798237e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.966504e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.065410e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.556212e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.906743e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.026143e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     0.466648 sec
+TOTAL       :     0.472306 sec
 INFO: No Floating Point Exceptions have been reported
-     2,016,435,863      cycles                           #    2.923 GHz                    
-     2,910,161,803      instructions                     #    1.44  insn per cycle         
-       0.746914415 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,982,520,983      cycles                           #    2.874 GHz                    
+     2,863,074,866      instructions                     #    1.44  insn per cycle         
+       0.745845869 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.060855e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.237227e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.247098e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.042838e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.231665e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.242155e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 6.734461e+02 +- 4.775415e+02 )  GeV^-2
-TOTAL       :     0.600780 sec
+TOTAL       :     0.611235 sec
 INFO: No Floating Point Exceptions have been reported
-     2,462,295,609      cycles                           #    2.931 GHz                    
-     3,758,168,227      instructions                     #    1.53  insn per cycle         
-       0.898777357 seconds time elapsed
+     2,455,829,243      cycles                           #    2.879 GHz                    
+     3,741,729,771      instructions                     #    1.52  insn per cycle         
+       0.912428146 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.413122e+00
 Avg ME (F77/GPU)   = 1.4131213755569487
 Relative difference = 4.418889885423659e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.423886e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.435676e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.435676e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.386940e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.398394e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.398394e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     6.780253 sec
+TOTAL       :     6.884140 sec
 INFO: No Floating Point Exceptions have been reported
-    20,292,808,666      cycles                           #    2.992 GHz                    
-    61,172,531,376      instructions                     #    3.01  insn per cycle         
-       6.784583027 seconds time elapsed
+    20,272,568,697      cycles                           #    2.944 GHz                    
+    61,168,730,148      instructions                     #    3.02  insn per cycle         
+       6.888274413 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1272) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213859069593
 Relative difference = 4.345647726386255e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.781460e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.827050e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.827050e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.669440e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.713215e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.713215e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     3.445935 sec
+TOTAL       :     3.529000 sec
 INFO: No Floating Point Exceptions have been reported
-    10,330,923,503      cycles                           #    2.995 GHz                    
-    30,532,479,349      instructions                     #    2.96  insn per cycle         
-       3.450093687 seconds time elapsed
+    10,335,535,502      cycles                           #    2.926 GHz                    
+    30,533,410,675      instructions                     #    2.95  insn per cycle         
+       3.532867905 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 5155) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213792564823
 Relative difference = 4.392710025734405e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.989697e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.145947e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.145947e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.803371e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.957146e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.957146e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.842100 sec
+TOTAL       :     1.880842 sec
 INFO: No Floating Point Exceptions have been reported
-     5,140,656,602      cycles                           #    2.786 GHz                    
-    11,872,353,123      instructions                     #    2.31  insn per cycle         
-       1.846234141 seconds time elapsed
+     5,141,108,977      cycles                           #    2.729 GHz                    
+    11,871,626,607      instructions                     #    2.31  insn per cycle         
+       1.885060685 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4887) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213600217192
 Relative difference = 4.5288254008796884e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.988303e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.018035e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.018035e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.734351e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.920231e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.920231e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     1.659869 sec
+TOTAL       :     1.703697 sec
 INFO: No Floating Point Exceptions have been reported
-     4,675,363,554      cycles                           #    2.811 GHz                    
-    11,165,824,588      instructions                     #    2.39  insn per cycle         
-       1.664003607 seconds time elapsed
+     4,677,605,202      cycles                           #    2.740 GHz                    
+    11,166,557,237      instructions                     #    2.39  insn per cycle         
+       1.707597039 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4508) (512y:  238) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213600217192
 Relative difference = 4.5288254008796884e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.142390e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.238965e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.238965e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.863155e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.956338e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.956338e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.008920e+02 +- 5.001681e+01 )  GeV^-2
-TOTAL       :     2.315448 sec
+TOTAL       :     2.409053 sec
 INFO: No Floating Point Exceptions have been reported
-     4,256,816,398      cycles                           #    1.836 GHz                    
-     6,405,066,584      instructions                     #    1.50  insn per cycle         
-       2.319602248 seconds time elapsed
+     4,255,960,621      cycles                           #    1.764 GHz                    
+     6,404,237,522      instructions                     #    1.50  insn per cycle         
+       2.413297760 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2036) (512y:  163) (512z: 3731)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 16 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 16 channels { 1 : 64, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttg.mad/SubProcesses/P1_gg_ttxg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.413122e+00
 Avg ME (F77/C++)    = 1.4131213786174055
 Relative difference = 4.3972324717191576e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
index b16495d432..5f3726dcea 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:18:46
+DATE: 2024-09-15_11:15:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.315060e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.337634e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.339238e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.308012e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.334511e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.336215e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.529607 sec
+TOTAL       :     0.533591 sec
 INFO: No Floating Point Exceptions have been reported
-     2,229,815,269      cycles                           #    2.915 GHz                    
-     3,529,273,827      instructions                     #    1.58  insn per cycle         
-       0.824163869 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
+     2,205,805,938      cycles                           #    2.869 GHz                    
+     3,444,884,387      instructions                     #    1.56  insn per cycle         
+       0.825307966 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.140673e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.168228e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.169384e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.131073e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.161174e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.162405e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.031789 sec
+TOTAL       :     3.048991 sec
 INFO: No Floating Point Exceptions have been reported
-     9,850,142,485      cycles                           #    2.985 GHz                    
-    22,204,433,056      instructions                     #    2.25  insn per cycle         
-       3.356282362 seconds time elapsed
+     9,688,028,273      cycles                           #    2.924 GHz                    
+    22,036,541,373      instructions                     #    2.27  insn per cycle         
+       3.369850770 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.910726e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.911643e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.911643e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.879946e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.880868e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.880868e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.589540 sec
+TOTAL       :     8.730075 sec
 INFO: No Floating Point Exceptions have been reported
-    25,627,728,244      cycles                           #    2.983 GHz                    
-    78,955,696,602      instructions                     #    3.08  insn per cycle         
-       8.593772191 seconds time elapsed
+    25,643,153,835      cycles                           #    2.937 GHz                    
+    78,954,437,611      instructions                     #    3.08  insn per cycle         
+       8.734432118 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.589854e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.593135e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.593135e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.520374e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.523613e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.523613e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.574578 sec
+TOTAL       :     4.665332 sec
 INFO: No Floating Point Exceptions have been reported
-    13,060,893,586      cycles                           #    2.853 GHz                    
-    39,558,722,366      instructions                     #    3.03  insn per cycle         
-       4.578801220 seconds time elapsed
+    13,099,128,105      cycles                           #    2.806 GHz                    
+    39,559,591,481      instructions                     #    3.02  insn per cycle         
+       4.669271517 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.198901e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.216222e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.216222e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.059011e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.075081e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.075081e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.006726 sec
+TOTAL       :     2.042413 sec
 INFO: No Floating Point Exceptions have been reported
-     5,614,026,038      cycles                           #    2.793 GHz                    
-    13,824,406,079      instructions                     #    2.46  insn per cycle         
-       2.010944818 seconds time elapsed
+     5,610,747,752      cycles                           #    2.743 GHz                    
+    13,824,504,616      instructions                     #    2.46  insn per cycle         
+       2.046398223 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.359309e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.380862e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.380862e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.162703e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.184308e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.184308e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.758680 sec
+TOTAL       :     1.796726 sec
 INFO: No Floating Point Exceptions have been reported
-     4,919,997,756      cycles                           #    2.792 GHz                    
-    12,505,088,358      instructions                     #    2.54  insn per cycle         
-       1.762772156 seconds time elapsed
+     4,922,237,700      cycles                           #    2.735 GHz                    
+    12,506,994,545      instructions                     #    2.54  insn per cycle         
+       1.800589813 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.179262e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.192439e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.192439e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.982168e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.994583e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.994583e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.290855 sec
+TOTAL       :     2.355452 sec
 INFO: No Floating Point Exceptions have been reported
-     4,141,477,668      cycles                           #    1.805 GHz                    
-     6,391,349,441      instructions                     #    1.54  insn per cycle         
-       2.295092021 seconds time elapsed
+     4,140,123,386      cycles                           #    1.756 GHz                    
+     6,390,153,387      instructions                     #    1.54  insn per cycle         
+       2.359734916 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1975) (512y:  102) (512z: 9386)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
index 0e41cfa3ce..1d93db579b 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:44:42
+DATE: 2024-09-15_11:48:14
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.989533e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.281981e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.281981e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.969430e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.268357e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.268357e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.522033 sec
+TOTAL       :     0.523555 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,202,251,491      cycles                           #    2.927 GHz                    
-     3,503,930,284      instructions                     #    1.59  insn per cycle         
-       0.813886529 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     2,166,841,175      cycles                           #    2.867 GHz                    
+     3,453,451,458      instructions                     #    1.59  insn per cycle         
+       0.814918597 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.635230e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.122249e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.122249e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.613032e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.091578e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.091578e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.305275 sec
+TOTAL       :     3.317499 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    10,626,401,811      cycles                           #    2.970 GHz                    
-    22,283,600,258      instructions                     #    2.10  insn per cycle         
-       3.635886705 seconds time elapsed
+    10,422,723,136      cycles                           #    2.898 GHz                    
+    15,879,167,379      instructions                     #    1.52  insn per cycle         
+       3.658545225 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.911101e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.912003e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.912003e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.878765e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.879684e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.879684e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.591953 sec
+TOTAL       :     8.741583 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    25,658,745,754      cycles                           #    2.985 GHz                    
-    78,961,778,827      instructions                     #    3.08  insn per cycle         
-       8.596346628 seconds time elapsed
+    25,666,397,830      cycles                           #    2.935 GHz                    
+    78,965,262,045      instructions                     #    3.08  insn per cycle         
+       8.745862119 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.579531e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.582836e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.582836e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.560977e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.564368e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.564368e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.591842 sec
+TOTAL       :     4.617447 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    13,090,469,361      cycles                           #    2.849 GHz                    
-    39,572,230,574      instructions                     #    3.02  insn per cycle         
-       4.596399381 seconds time elapsed
+    13,088,026,122      cycles                           #    2.833 GHz                    
+    39,572,731,788      instructions                     #    3.02  insn per cycle         
+       4.621932955 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.191077e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.208104e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.208104e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.016507e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.032941e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.032941e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.012743 sec
+TOTAL       :     2.058059 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,624,310,938      cycles                           #    2.789 GHz                    
-    13,834,627,183      instructions                     #    2.46  insn per cycle         
-       2.017253922 seconds time elapsed
+     5,631,279,447      cycles                           #    2.732 GHz                    
+    13,836,775,240      instructions                     #    2.46  insn per cycle         
+       2.062638485 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.291143e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.314088e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.314088e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.172752e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.195878e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.195878e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.775535 sec
+TOTAL       :     1.800276 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,937,092,344      cycles                           #    2.775 GHz                    
-    12,517,222,540      instructions                     #    2.54  insn per cycle         
-       1.779883373 seconds time elapsed
+     4,940,734,767      cycles                           #    2.740 GHz                    
+    12,518,660,568      instructions                     #    2.53  insn per cycle         
+       1.804734715 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.075955e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.088365e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.088365e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.912888e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.925297e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.925297e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.328033 sec
+TOTAL       :     2.384873 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,154,862,662      cycles                           #    1.782 GHz                    
-     6,403,687,293      instructions                     #    1.54  insn per cycle         
-       2.332579074 seconds time elapsed
+     4,161,817,801      cycles                           #    1.743 GHz                    
+     6,405,054,448      instructions                     #    1.54  insn per cycle         
+       2.389410885 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1975) (512y:  102) (512z: 9386)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
index c2df789ac1..fc2e4b7aa0 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:55:28
+DATE: 2024-09-15_11:59:16
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.330394e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.354681e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.356479e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.322702e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.346002e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.347615e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     0.516014 sec
+TOTAL       :     0.517480 sec
 INFO: No Floating Point Exceptions have been reported
-     2,169,057,551      cycles                           #    2.913 GHz                    
-     3,493,264,432      instructions                     #    1.61  insn per cycle         
-       0.806582698 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
+     2,154,192,085      cycles                           #    2.875 GHz                    
+     3,384,532,263      instructions                     #    1.57  insn per cycle         
+       0.808566781 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.127429e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.159380e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.160647e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.137866e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.167359e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.168584e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.252232e+02 +- 1.234346e+02 )  GeV^-4
-TOTAL       :     3.140428 sec
+TOTAL       :     3.158061 sec
 INFO: No Floating Point Exceptions have been reported
-    10,120,833,598      cycles                           #    2.979 GHz                    
-    21,427,031,047      instructions                     #    2.12  insn per cycle         
-       3.454066352 seconds time elapsed
+     9,917,250,541      cycles                           #    2.905 GHz                    
+    22,199,780,027      instructions                     #    2.24  insn per cycle         
+       3.469925481 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.903992e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.904921e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.904921e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.876744e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.877706e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.877706e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     8.621406 sec
+TOTAL       :     8.747062 sec
 INFO: No Floating Point Exceptions have been reported
-    25,663,807,868      cycles                           #    2.976 GHz                    
-    78,956,902,054      instructions                     #    3.08  insn per cycle         
-       8.627193724 seconds time elapsed
+    25,648,612,426      cycles                           #    2.931 GHz                    
+    78,952,780,288      instructions                     #    3.08  insn per cycle         
+       8.750949596 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.624340e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.627627e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.627627e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.516490e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.519795e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.519795e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     4.532651 sec
+TOTAL       :     4.672214 sec
 INFO: No Floating Point Exceptions have been reported
-    13,057,282,573      cycles                           #    2.879 GHz                    
-    39,558,198,359      instructions                     #    3.03  insn per cycle         
-       4.536719035 seconds time elapsed
+    13,064,181,413      cycles                           #    2.795 GHz                    
+    39,557,975,845      instructions                     #    3.03  insn per cycle         
+       4.676162125 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.204978e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.221960e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.221960e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.035027e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.051957e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.051957e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     2.006568 sec
+TOTAL       :     2.050050 sec
 INFO: No Floating Point Exceptions have been reported
-     5,618,687,412      cycles                           #    2.795 GHz                    
-    13,822,855,763      instructions                     #    2.46  insn per cycle         
-       2.010749099 seconds time elapsed
+     5,620,854,320      cycles                           #    2.738 GHz                    
+    13,824,518,317      instructions                     #    2.46  insn per cycle         
+       2.054209584 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.344898e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.366603e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.366603e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.147098e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.168017e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.168017e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     1.762820 sec
+TOTAL       :     1.801905 sec
 INFO: No Floating Point Exceptions have been reported
-     4,933,693,544      cycles                           #    2.794 GHz                    
-    12,504,027,488      instructions                     #    2.53  insn per cycle         
-       1.766918683 seconds time elapsed
+     4,929,552,945      cycles                           #    2.731 GHz                    
+    12,503,971,607      instructions                     #    2.54  insn per cycle         
+       1.805808510 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.216455e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.229143e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.229143e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.940588e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.953237e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.953237e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.197467e-01 +- 3.250467e-01 )  GeV^-4
-TOTAL       :     2.280566 sec
+TOTAL       :     2.372033 sec
 INFO: No Floating Point Exceptions have been reported
-     4,150,319,540      cycles                           #    1.817 GHz                    
-     6,389,476,563      instructions                     #    1.54  insn per cycle         
-       2.284688942 seconds time elapsed
+     4,149,036,914      cycles                           #    1.747 GHz                    
+     6,390,952,192      instructions                     #    1.54  insn per cycle         
+       2.375889237 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1975) (512y:  102) (512z: 9386)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
index d0233b9398..9898610236 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:52:39
+DATE: 2024-09-15_11:56:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.318416e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.342529e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.344516e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.313175e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.336829e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.338592e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.514493 sec
+TOTAL       :     0.516520 sec
 INFO: No Floating Point Exceptions have been reported
-     2,165,328,295      cycles                           #    2.912 GHz                    
-     3,440,104,971      instructions                     #    1.59  insn per cycle         
-       0.805805306 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
+     2,145,509,423      cycles                           #    2.867 GHz                    
+     3,354,996,461      instructions                     #    1.56  insn per cycle         
+       0.808722776 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.134192e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.166205e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.167551e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.143109e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.171681e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.172874e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.076661 sec
+TOTAL       :     3.078670 sec
 INFO: No Floating Point Exceptions have been reported
-     9,913,932,835      cycles                           #    2.974 GHz                    
-    22,782,867,940      instructions                     #    2.30  insn per cycle         
-       3.391164141 seconds time elapsed
+     9,691,265,584      cycles                           #    2.906 GHz                    
+    21,333,506,403      instructions                     #    2.20  insn per cycle         
+       3.390410721 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.920273e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.921234e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.921234e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.881675e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.882585e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.882585e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.546594 sec
+TOTAL       :     8.722848 sec
 INFO: No Floating Point Exceptions have been reported
-    25,646,311,324      cycles                           #    3.000 GHz                    
-    78,960,017,472      instructions                     #    3.08  insn per cycle         
-       8.550676592 seconds time elapsed
+    25,651,425,779      cycles                           #    2.940 GHz                    
+    78,952,412,711      instructions                     #    3.08  insn per cycle         
+       8.726726338 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.594154e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.597374e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.597374e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.517566e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.520849e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.520849e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.568989 sec
+TOTAL       :     4.669337 sec
 INFO: No Floating Point Exceptions have been reported
-    13,102,878,898      cycles                           #    2.866 GHz                    
-    39,561,274,098      instructions                     #    3.02  insn per cycle         
-       4.573103208 seconds time elapsed
+    13,090,805,200      cycles                           #    2.802 GHz                    
+    39,558,366,602      instructions                     #    3.02  insn per cycle         
+       4.673224740 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.221553e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.238357e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.238357e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.039895e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.055822e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.055822e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.000959 sec
+TOTAL       :     2.046582 sec
 INFO: No Floating Point Exceptions have been reported
-     5,606,842,172      cycles                           #    2.797 GHz                    
-    13,823,630,038      instructions                     #    2.47  insn per cycle         
-       2.005081475 seconds time elapsed
+     5,609,737,694      cycles                           #    2.737 GHz                    
+    13,824,245,582      instructions                     #    2.46  insn per cycle         
+       2.050512234 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.323867e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.345458e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.345458e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.131967e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.152226e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.152226e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.765274 sec
+TOTAL       :     1.803136 sec
 INFO: No Floating Point Exceptions have been reported
-     4,923,007,337      cycles                           #    2.783 GHz                    
-    12,505,051,193      instructions                     #    2.54  insn per cycle         
-       1.769470001 seconds time elapsed
+     4,925,220,417      cycles                           #    2.727 GHz                    
+    12,506,075,504      instructions                     #    2.54  insn per cycle         
+       1.807026447 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.173442e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.186199e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.186199e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.955213e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.967580e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.967580e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.292842 sec
+TOTAL       :     2.365271 sec
 INFO: No Floating Point Exceptions have been reported
-     4,144,135,456      cycles                           #    1.805 GHz                    
-     6,391,162,760      instructions                     #    1.54  insn per cycle         
-       2.297102234 seconds time elapsed
+     4,141,642,997      cycles                           #    1.749 GHz                    
+     6,391,703,659      instructions                     #    1.54  insn per cycle         
+       2.369209251 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1975) (512y:  102) (512z: 9386)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
index 635f9015c3..794fb1a802 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,76 +11,76 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:49:56
+DATE: 2024-09-15_11:53:35
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.081101e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.356424e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.358297e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.043181e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.332865e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.334802e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.517092 sec
+TOTAL       :     0.523228 sec
 INFO: No Floating Point Exceptions have been reported
-     2,169,776,798      cycles                           #    2.903 GHz                    
-     3,470,342,653      instructions                     #    1.60  insn per cycle         
-       0.807877251 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
+     2,169,607,107      cycles                           #    2.877 GHz                    
+     3,459,237,306      instructions                     #    1.59  insn per cycle         
+       0.815202021 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.731419e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.163821e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.165190e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.724697e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.162122e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.163349e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.207952 sec
+TOTAL       :     3.220306 sec
 INFO: No Floating Point Exceptions have been reported
-    10,211,555,385      cycles                           #    2.948 GHz                    
-    21,576,803,795      instructions                     #    2.11  insn per cycle         
-       3.520497556 seconds time elapsed
+    10,133,189,238      cycles                           #    2.914 GHz                    
+    23,148,993,968      instructions                     #    2.28  insn per cycle         
+       3.534065636 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -88,33 +88,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.919767e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.920710e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.920710e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.879192e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.880098e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.880098e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.548661 sec
+TOTAL       :     8.734016 sec
 INFO: No Floating Point Exceptions have been reported
-    25,650,941,063      cycles                           #    3.000 GHz                    
-    78,955,343,064      instructions                     #    3.08  insn per cycle         
-       8.552742436 seconds time elapsed
+    25,661,507,280      cycles                           #    2.937 GHz                    
+    78,953,590,713      instructions                     #    3.08  insn per cycle         
+       8.737772518 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4842) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -122,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.513813e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.516942e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.516942e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.527947e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.531121e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.531121e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.673624 sec
+TOTAL       :     4.656259 sec
 INFO: No Floating Point Exceptions have been reported
-    13,066,932,507      cycles                           #    2.794 GHz                    
-    39,559,326,469      instructions                     #    3.03  insn per cycle         
-       4.677800727 seconds time elapsed
+    13,056,695,540      cycles                           #    2.803 GHz                    
+    39,560,471,761      instructions                     #    3.03  insn per cycle         
+       4.660116201 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13192) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -154,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.202148e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.218438e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.218438e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.991321e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.007389e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.007389e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.005511 sec
+TOTAL       :     2.059547 sec
 INFO: No Floating Point Exceptions have been reported
-     5,606,683,157      cycles                           #    2.791 GHz                    
-    13,823,530,699      instructions                     #    2.47  insn per cycle         
-       2.009685853 seconds time elapsed
+     5,612,692,411      cycles                           #    2.721 GHz                    
+    13,825,461,651      instructions                     #    2.46  insn per cycle         
+       2.063599543 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11520) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -186,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.377013e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.399305e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.399305e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.168393e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.189788e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.189788e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.755095 sec
+TOTAL       :     1.795888 sec
 INFO: No Floating Point Exceptions have been reported
-     4,921,086,197      cycles                           #    2.798 GHz                    
-    12,505,079,449      instructions                     #    2.54  insn per cycle         
-       1.759276338 seconds time elapsed
+     4,923,088,043      cycles                           #    2.737 GHz                    
+    12,506,721,234      instructions                     #    2.54  insn per cycle         
+       1.799844991 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10439) (512y:   88) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -218,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.166192e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.180300e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.180300e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.949785e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.962226e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.962226e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.295174 sec
+TOTAL       :     2.367179 sec
 INFO: No Floating Point Exceptions have been reported
-     4,143,137,312      cycles                           #    1.802 GHz                    
-     6,391,191,490      instructions                     #    1.54  insn per cycle         
-       2.299503002 seconds time elapsed
+     4,144,948,568      cycles                           #    1.750 GHz                    
+     6,391,796,529      instructions                     #    1.54  insn per cycle         
+       2.371216811 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1975) (512y:  102) (512z: 9386)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -250,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
index 8702657ea6..80da0089a3 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:19:20
+DATE: 2024-09-15_11:16:25
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.329626e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.352172e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.354025e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.297695e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.326014e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.328135e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.528976 sec
+TOTAL       :     0.536243 sec
 INFO: No Floating Point Exceptions have been reported
-     2,235,005,557      cycles                           #    2.928 GHz                    
-     3,523,767,617      instructions                     #    1.58  insn per cycle         
-       0.822200243 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
+     2,198,395,650      cycles                           #    2.855 GHz                    
+     3,395,768,128      instructions                     #    1.54  insn per cycle         
+       0.828484590 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.150039e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.177704e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.178867e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.139738e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.170223e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.171507e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.028533 sec
+TOTAL       :     3.043962 sec
 INFO: No Floating Point Exceptions have been reported
-     9,771,225,354      cycles                           #    2.968 GHz                    
-    20,608,619,362      instructions                     #    2.11  insn per cycle         
-       3.348285124 seconds time elapsed
+     9,646,114,898      cycles                           #    2.920 GHz                    
+    22,170,499,370      instructions                     #    2.30  insn per cycle         
+       3.361578134 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158133E-004
 Relative difference = 2.837296512218831e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.914314e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.915252e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.915252e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.884927e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.885834e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.885834e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.573222 sec
+TOTAL       :     8.707889 sec
 INFO: No Floating Point Exceptions have been reported
-    25,658,733,071      cycles                           #    2.992 GHz                    
-    78,700,903,284      instructions                     #    3.07  insn per cycle         
-       8.577401591 seconds time elapsed
+    25,619,332,595      cycles                           #    2.941 GHz                    
+    78,702,929,908      instructions                     #    3.07  insn per cycle         
+       8.712420077 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4191) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141133E-004
 Relative difference = 2.8372990776517314e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.626644e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.629945e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.629945e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.574307e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.577560e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.577560e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.528164 sec
+TOTAL       :     4.595188 sec
 INFO: No Floating Point Exceptions have been reported
-    13,032,300,901      cycles                           #    2.876 GHz                    
-    39,448,920,452      instructions                     #    3.03  insn per cycle         
-       4.532355358 seconds time elapsed
+    13,048,399,086      cycles                           #    2.838 GHz                    
+    39,450,691,251      instructions                     #    3.02  insn per cycle         
+       4.599210719 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:12966) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141122E-004
 Relative difference = 2.837299079287849e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.099683e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.115929e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.115929e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.930739e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.946422e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.946422e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.030785 sec
+TOTAL       :     2.074898 sec
 INFO: No Floating Point Exceptions have been reported
-     5,686,751,600      cycles                           #    2.796 GHz                    
-    13,911,142,235      instructions                     #    2.45  insn per cycle         
-       2.034951048 seconds time elapsed
+     5,675,338,380      cycles                           #    2.732 GHz                    
+    13,910,840,784      instructions                     #    2.45  insn per cycle         
+       2.079006346 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11582) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.268666e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.290003e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.290003e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.062486e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.083766e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.083766e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.775645 sec
+TOTAL       :     1.816561 sec
 INFO: No Floating Point Exceptions have been reported
-     4,993,199,393      cycles                           #    2.807 GHz                    
-    12,602,433,108      instructions                     #    2.52  insn per cycle         
-       1.779957429 seconds time elapsed
+     4,996,440,015      cycles                           #    2.746 GHz                    
+    12,603,390,155      instructions                     #    2.52  insn per cycle         
+       1.820566072 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10423) (512y:  240) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.206059e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.219386e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.219386e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.965367e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.977715e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.977715e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.282127 sec
+TOTAL       :     2.361480 sec
 INFO: No Floating Point Exceptions have been reported
-     4,159,427,245      cycles                           #    1.820 GHz                    
-     6,499,223,807      instructions                     #    1.56  insn per cycle         
-       2.286309091 seconds time elapsed
+     4,159,091,159      cycles                           #    1.759 GHz                    
+     6,499,576,244      instructions                     #    1.56  insn per cycle         
+       2.365402468 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1751) (512y:  194) (512z: 9382)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198157320E-004
 Relative difference = 2.837296634927675e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
index 96ffb76cf9..a149b91e1f 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:35:14
+DATE: 2024-09-15_11:36:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.097238e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.116880e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.118273e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.106076e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.131267e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.133080e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.537823 sec
+TOTAL       :     0.544695 sec
 INFO: No Floating Point Exceptions have been reported
-     2,250,719,878      cycles                           #    2.921 GHz                    
-     3,578,968,848      instructions                     #    1.59  insn per cycle         
-       0.827505559 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1
+     2,219,428,600      cycles                           #    2.858 GHz                    
+     3,493,527,234      instructions                     #    1.57  insn per cycle         
+       0.834499500 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.751486e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.775770e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.776803e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.753348e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.778591e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.779655e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.302670 sec
+TOTAL       :     3.312449 sec
 INFO: No Floating Point Exceptions have been reported
-    10,538,630,307      cycles                           #    2.961 GHz                    
-    24,318,411,648      instructions                     #    2.31  insn per cycle         
-       3.614850320 seconds time elapsed
+    10,421,899,087      cycles                           #    2.913 GHz                    
+    24,058,421,553      instructions                     #    2.31  insn per cycle         
+       3.636091022 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158122E-004
 Relative difference = 2.837296513854949e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.356633e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.357117e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.357117e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.268976e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.269450e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.269450e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    37.650558 sec
+TOTAL       :    38.423779 sec
 INFO: No Floating Point Exceptions have been reported
-   112,738,803,262      cycles                           #    2.994 GHz                    
-   144,792,925,936      instructions                     #    1.28  insn per cycle         
-      37.654827076 seconds time elapsed
+   112,730,268,623      cycles                           #    2.934 GHz                    
+   144,772,135,406      instructions                     #    1.28  insn per cycle         
+      38.427951659 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:21273) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198140461E-004
 Relative difference = 2.8372991790910424e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.143637e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.146289e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.146289e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.077363e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.079742e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.079742e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.223092 sec
+TOTAL       :     5.335511 sec
 INFO: No Floating Point Exceptions have been reported
-    14,749,878,339      cycles                           #    2.822 GHz                    
-    37,648,636,162      instructions                     #    2.55  insn per cycle         
-       5.227369590 seconds time elapsed
+    14,752,370,812      cycles                           #    2.763 GHz                    
+    37,645,694,563      instructions                     #    2.55  insn per cycle         
+       5.339828429 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:68253) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141209E-004
 Relative difference = 2.8372990661989057e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.539545e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.554230e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.554230e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.332306e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.345602e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.345602e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.181602 sec
+TOTAL       :     2.243210 sec
 INFO: No Floating Point Exceptions have been reported
-     6,123,137,248      cycles                           #    2.802 GHz                    
-    13,060,513,042      instructions                     #    2.13  insn per cycle         
-       2.185812086 seconds time elapsed
+     6,130,370,628      cycles                           #    2.729 GHz                    
+    13,060,931,234      instructions                     #    2.13  insn per cycle         
+       2.247428561 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46973) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.142318e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.163428e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.163428e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.812729e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.832105e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.832105e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.800190 sec
+TOTAL       :     1.867280 sec
 INFO: No Floating Point Exceptions have been reported
-     5,062,465,914      cycles                           #    2.807 GHz                    
-    11,454,182,692      instructions                     #    2.26  insn per cycle         
-       1.804398301 seconds time elapsed
+     5,063,580,201      cycles                           #    2.707 GHz                    
+    11,453,397,200      instructions                     #    2.26  insn per cycle         
+       1.871531437 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40498) (512y:  285) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.490777e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.504795e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.504795e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.266726e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.280024e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.280024e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.195736 sec
+TOTAL       :     2.263638 sec
 INFO: No Floating Point Exceptions have been reported
-     3,951,905,441      cycles                           #    1.797 GHz                    
-     5,928,043,150      instructions                     #    1.50  insn per cycle         
-       2.199938848 seconds time elapsed
+     3,957,788,966      cycles                           #    1.746 GHz                    
+     5,926,468,977      instructions                     #    1.50  insn per cycle         
+       2.267826067 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2444) (512y:  337) (512z:39349)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
index ef5b0520d3..c0add05aa1 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_d_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:36:22
+DATE: 2024-09-15_11:38:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.106464e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.130019e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.131660e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.096121e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.123086e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.125058e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.536986 sec
+TOTAL       :     0.541972 sec
 INFO: No Floating Point Exceptions have been reported
-     2,245,613,083      cycles                           #    2.928 GHz                    
-     3,531,927,717      instructions                     #    1.57  insn per cycle         
-       0.825355433 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1
+     2,219,963,696      cycles                           #    2.870 GHz                    
+     3,470,909,979      instructions                     #    1.56  insn per cycle         
+       0.830712751 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.743190e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.767517e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.768547e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.756387e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.782287e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.783295e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.299173 sec
+TOTAL       :     3.298693 sec
 INFO: No Floating Point Exceptions have been reported
-    10,563,427,043      cycles                           #    2.972 GHz                    
-    24,228,869,138      instructions                     #    2.29  insn per cycle         
-       3.612557038 seconds time elapsed
+    10,399,316,447      cycles                           #    2.922 GHz                    
+    23,584,057,660      instructions                     #    2.27  insn per cycle         
+       3.614068267 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_d_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266731198158122E-004
 Relative difference = 2.837296513854949e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_d_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.301726e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.302192e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.302192e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.224460e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.224899e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.224899e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :    38.130581 sec
+TOTAL       :    38.827526 sec
 INFO: No Floating Point Exceptions have been reported
-   113,774,788,028      cycles                           #    2.984 GHz                    
-   144,279,987,880      instructions                     #    1.27  insn per cycle         
-      38.134610815 seconds time elapsed
+   113,783,414,735      cycles                           #    2.930 GHz                    
+   144,278,309,276      instructions                     #    1.27  insn per cycle         
+      38.831628591 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:21024) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198140450E-004
 Relative difference = 2.83729918072716e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.058351e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.060826e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.060826e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.989108e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.991357e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.991357e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     5.368552 sec
+TOTAL       :     5.492626 sec
 INFO: No Floating Point Exceptions have been reported
-    15,303,684,701      cycles                           #    2.849 GHz                    
-    38,390,561,420      instructions                     #    2.51  insn per cycle         
-       5.372764017 seconds time elapsed
+    15,275,599,565      cycles                           #    2.780 GHz                    
+    38,389,599,156      instructions                     #    2.51  insn per cycle         
+       5.496788286 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69643) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198141209E-004
 Relative difference = 2.8372990661989057e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.700721e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.715343e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.715343e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.497881e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.512338e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.512338e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.135636 sec
+TOTAL       :     2.193501 sec
 INFO: No Floating Point Exceptions have been reported
-     6,011,281,562      cycles                           #    2.810 GHz                    
-    12,934,915,193      instructions                     #    2.15  insn per cycle         
-       2.139765113 seconds time elapsed
+     6,019,122,923      cycles                           #    2.740 GHz                    
+    12,933,620,431      instructions                     #    2.15  insn per cycle         
+       2.197765722 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:46099) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.081941e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.102133e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.102133e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.839318e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.859960e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.859960e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.811792 sec
+TOTAL       :     1.861821 sec
 INFO: No Floating Point Exceptions have been reported
-     5,089,368,793      cycles                           #    2.804 GHz                    
-    11,449,399,387      instructions                     #    2.25  insn per cycle         
-       1.815957052 seconds time elapsed
+     5,093,783,286      cycles                           #    2.731 GHz                    
+    11,449,481,812      instructions                     #    2.25  insn per cycle         
+       1.866150033 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:40142) (512y:  219) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.589010e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.603957e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.603957e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.279822e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.293417e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.293417e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.167801 sec
+TOTAL       :     2.259516 sec
 INFO: No Floating Point Exceptions have been reported
-     3,945,734,127      cycles                           #    1.817 GHz                    
-     5,888,838,565      instructions                     #    1.49  insn per cycle         
-       2.172122091 seconds time elapsed
+     3,958,337,222      cycles                           #    1.750 GHz                    
+     5,889,113,860      instructions                     #    1.49  insn per cycle         
+       2.263750575 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1959) (512y:  259) (512z:38927)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_d_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731198156789E-004
 Relative difference = 2.837296715097453e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
index a6de8a92c1..f53bdfcb06 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:19:53
+DATE: 2024-09-15_11:16:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.525526e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.560984e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.564965e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.467249e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.509285e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.513718e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.492424 sec
+TOTAL       :     0.494218 sec
 INFO: No Floating Point Exceptions have been reported
-     2,001,879,633      cycles                           #    2.819 GHz                    
-     3,019,819,984      instructions                     #    1.51  insn per cycle         
-       0.769396542 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
+     2,034,103,432      cycles                           #    2.860 GHz                    
+     3,045,186,386      instructions                     #    1.50  insn per cycle         
+       0.768364063 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.137089e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.191445e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.193924e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.128844e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.190571e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.193248e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.794834 sec
+TOTAL       :     1.796967 sec
 INFO: No Floating Point Exceptions have been reported
-     5,999,042,027      cycles                           #    2.947 GHz                    
-    12,933,175,351      instructions                     #    2.16  insn per cycle         
-       2.095708185 seconds time elapsed
+     5,948,917,067      cycles                           #    2.924 GHz                    
+    12,254,957,631      instructions                     #    2.06  insn per cycle         
+       2.089755272 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.962685e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.963647e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.963647e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.939424e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.940380e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.940380e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.361298 sec
+TOTAL       :     8.461205 sec
 INFO: No Floating Point Exceptions have been reported
-    24,937,506,812      cycles                           #    2.982 GHz                    
-    79,113,828,402      instructions                     #    3.17  insn per cycle         
-       8.365325330 seconds time elapsed
+    24,939,277,475      cycles                           #    2.947 GHz                    
+    79,109,068,255      instructions                     #    3.17  insn per cycle         
+       8.465315543 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274863312764526E-004
 Relative difference = 4.998523613136231e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.116873e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.129943e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.129943e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.989306e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.001573e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.001573e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.309301 sec
+TOTAL       :     2.351266 sec
 INFO: No Floating Point Exceptions have been reported
-     6,531,767,755      cycles                           #    2.824 GHz                    
-    20,271,250,387      instructions                     #    3.10  insn per cycle         
-       2.313702533 seconds time elapsed
+     6,525,064,847      cycles                           #    2.771 GHz                    
+    20,269,487,959      instructions                     #    3.11  insn per cycle         
+       2.355049106 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861442972011E-004
 Relative difference = 2.1772539563413118e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.630164e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.636943e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.636943e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.582613e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.589051e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.589051e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.011713 sec
+TOTAL       :     1.041194 sec
 INFO: No Floating Point Exceptions have been reported
-     2,849,688,057      cycles                           #    2.807 GHz                    
-     7,066,344,339      instructions                     #    2.48  insn per cycle         
-       1.015815872 seconds time elapsed
+     2,848,829,047      cycles                           #    2.729 GHz                    
+     7,065,493,216      instructions                     #    2.48  insn per cycle         
+       1.044894531 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.849480e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.858102e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.858102e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.794003e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.802231e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.802231e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.891670 sec
+TOTAL       :     0.919231 sec
 INFO: No Floating Point Exceptions have been reported
-     2,522,839,359      cycles                           #    2.818 GHz                    
-     6,403,669,992      instructions                     #    2.54  insn per cycle         
-       0.895820914 seconds time elapsed
+     2,522,001,135      cycles                           #    2.735 GHz                    
+     6,403,495,458      instructions                     #    2.54  insn per cycle         
+       0.923373159 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11021) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.457100e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.462451e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.462451e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.403418e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.408437e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.408437e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.130822 sec
+TOTAL       :     1.173408 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,347,920      cycles                           #    1.824 GHz                    
-     3,303,789,535      instructions                     #    1.60  insn per cycle         
-       1.135015326 seconds time elapsed
+     2,065,585,282      cycles                           #    1.756 GHz                    
+     3,303,212,083      instructions                     #    1.60  insn per cycle         
+       1.177101647 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2601) (512y:   46) (512z: 9605)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952779718007E-004
 Relative difference = 4.194411063934945e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
index b61796eb51..99ccf0b7c6 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:45:16
+DATE: 2024-09-15_11:48:48
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.950659e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.505059e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.505059e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.945945e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.468849e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.468849e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.476846 sec
+TOTAL       :     0.482042 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,018,132,291      cycles                           #    2.915 GHz                    
-     3,077,441,374      instructions                     #    1.52  insn per cycle         
-       0.749610068 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     1,997,062,353      cycles                           #    2.864 GHz                    
+     3,031,546,242      instructions                     #    1.52  insn per cycle         
+       0.755384112 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.950025e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.080536e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.080536e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.954589e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.017029e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.017029e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.969137 sec
+TOTAL       :     2.150287 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,563,176,344      cycles                           #    2.957 GHz                    
-    14,010,044,711      instructions                     #    2.13  insn per cycle         
-       2.277055445 seconds time elapsed
+     6,911,046,669      cycles                           #    2.894 GHz                    
+     9,720,301,924      instructions                     #    1.41  insn per cycle         
+       2.447262326 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.975190e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.976183e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.976183e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.933142e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934088e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934088e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.310516 sec
+TOTAL       :     8.491300 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    24,940,948,507      cycles                           #    3.000 GHz                    
-    79,113,943,646      instructions                     #    3.17  insn per cycle         
-       8.314719259 seconds time elapsed
+    24,914,436,852      cycles                           #    2.933 GHz                    
+    79,112,976,787      instructions                     #    3.18  insn per cycle         
+       8.495346137 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274863312764526E-004
 Relative difference = 4.998523613136231e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.173055e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.186375e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.186375e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.985457e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.998623e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.998623e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.293926 sec
+TOTAL       :     2.355396 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,540,473,256      cycles                           #    2.847 GHz                    
-    20,280,249,725      instructions                     #    3.10  insn per cycle         
-       2.298218918 seconds time elapsed
+     6,536,388,186      cycles                           #    2.771 GHz                    
+    20,278,657,318      instructions                     #    3.10  insn per cycle         
+       2.359340287 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861442972011E-004
 Relative difference = 2.1772539563413118e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.621304e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.628040e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.628040e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.588469e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.595113e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.595113e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.019285 sec
+TOTAL       :     1.040170 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,858,228,957      cycles                           #    2.794 GHz                    
-     7,075,917,816      instructions                     #    2.48  insn per cycle         
-       1.023527550 seconds time elapsed
+     2,854,653,705      cycles                           #    2.736 GHz                    
+     7,075,192,119      instructions                     #    2.48  insn per cycle         
+       1.044116961 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.847241e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.855664e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.855664e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.764583e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.772721e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.772721e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.895280 sec
+TOTAL       :     0.937436 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,529,427,682      cycles                           #    2.814 GHz                    
-     6,413,812,862      instructions                     #    2.54  insn per cycle         
-       0.899477275 seconds time elapsed
+     2,529,004,808      cycles                           #    2.688 GHz                    
+     6,413,196,189      instructions                     #    2.54  insn per cycle         
+       0.941494819 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11021) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.472092e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.477566e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.477566e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.393518e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.398724e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.398724e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.122188 sec
+TOTAL       :     1.184860 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,077,583,112      cycles                           #    1.845 GHz                    
-     3,314,052,942      instructions                     #    1.60  insn per cycle         
-       1.126483602 seconds time elapsed
+     2,077,752,907      cycles                           #    1.749 GHz                    
+     3,313,647,639      instructions                     #    1.59  insn per cycle         
+       1.188846310 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2601) (512y:   46) (512z: 9605)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952779718007E-004
 Relative difference = 4.194411063934945e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
index 1b7d8e33ca..19f64c3e7a 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_common.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:56:01
+DATE: 2024-09-15_11:59:51
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.508785e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.546625e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.550440e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.517186e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.553165e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.556664e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.159396e-01 +- 3.238803e-01 )  GeV^-4
-TOTAL       :     0.473604 sec
+TOTAL       :     0.477606 sec
 INFO: No Floating Point Exceptions have been reported
-     2,017,340,152      cycles                           #    2.933 GHz                    
-     3,080,271,867      instructions                     #    1.53  insn per cycle         
-       0.745522319 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
+     1,989,569,741      cycles                           #    2.873 GHz                    
+     3,005,042,417      instructions                     #    1.51  insn per cycle         
+       0.749585148 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --common
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:COMMON+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.132695e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.194961e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.197817e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.132207e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.190283e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.192869e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.094367e+02 +- 1.071509e+02 )  GeV^-4
-TOTAL       :     1.870998 sec
+TOTAL       :     1.892599 sec
 INFO: No Floating Point Exceptions have been reported
-     6,188,150,786      cycles                           #    2.945 GHz                    
-    12,176,753,673      instructions                     #    1.97  insn per cycle         
-       2.156923874 seconds time elapsed
+     6,180,255,032      cycles                           #    2.913 GHz                    
+    13,158,154,431      instructions                     #    2.13  insn per cycle         
+       2.179693271 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.971622e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.972577e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.972577e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.919400e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.920348e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.920348e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208459e-01 +- 3.253446e-01 )  GeV^-4
-TOTAL       :     8.324572 sec
+TOTAL       :     8.550896 sec
 INFO: No Floating Point Exceptions have been reported
-    24,943,820,538      cycles                           #    2.996 GHz                    
-    79,110,662,739      instructions                     #    3.17  insn per cycle         
-       8.328497474 seconds time elapsed
+    24,917,761,266      cycles                           #    2.921 GHz                    
+    79,107,928,249      instructions                     #    3.17  insn per cycle         
+       8.554412617 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274863312764526E-004
 Relative difference = 4.998523613136231e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.128415e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.141771e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.141771e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.947395e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.959971e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.959971e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.208457e-01 +- 3.253445e-01 )  GeV^-4
-TOTAL       :     2.307045 sec
+TOTAL       :     2.367170 sec
 INFO: No Floating Point Exceptions have been reported
-     6,543,184,083      cycles                           #    2.833 GHz                    
-    20,272,761,591      instructions                     #    3.10  insn per cycle         
-       2.311035863 seconds time elapsed
+     6,537,639,637      cycles                           #    2.759 GHz                    
+    20,270,199,231      instructions                     #    3.10  insn per cycle         
+       2.370801699 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861442972011E-004
 Relative difference = 2.1772539563413118e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.627514e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.634489e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.634489e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.589719e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.596352e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.596352e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     1.013506 sec
+TOTAL       :     1.038242 sec
 INFO: No Floating Point Exceptions have been reported
-     2,853,225,522      cycles                           #    2.806 GHz                    
-     7,063,897,484      instructions                     #    2.48  insn per cycle         
-       1.017452145 seconds time elapsed
+     2,854,773,942      cycles                           #    2.742 GHz                    
+     7,065,309,093      instructions                     #    2.47  insn per cycle         
+       1.041774606 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.742068e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.749981e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.749981e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.793297e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.801693e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.801693e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214978e-01 +- 3.255521e-01 )  GeV^-4
-TOTAL       :     0.947580 sec
+TOTAL       :     0.921472 sec
 INFO: No Floating Point Exceptions have been reported
-     2,521,098,765      cycles                           #    2.651 GHz                    
-     6,400,414,638      instructions                     #    2.54  insn per cycle         
-       0.951668346 seconds time elapsed
+     2,523,779,273      cycles                           #    2.730 GHz                    
+     6,401,399,707      instructions                     #    2.54  insn per cycle         
+       0.925110369 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11021) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --common OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:COMMON+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.444514e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.449851e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.449851e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.398357e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.403401e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.403401e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.214981e-01 +- 3.255523e-01 )  GeV^-4
-TOTAL       :     1.141729 sec
+TOTAL       :     1.179617 sec
 INFO: No Floating Point Exceptions have been reported
-     2,071,860,072      cycles                           #    1.809 GHz                    
-     3,300,448,625      instructions                     #    1.59  insn per cycle         
-       1.145827411 seconds time elapsed
+     2,071,965,297      cycles                           #    1.751 GHz                    
+     3,301,502,867      instructions                     #    1.59  insn per cycle         
+       1.184374263 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2601) (512y:   46) (512z: 9605)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952779718007E-004
 Relative difference = 4.194411063934945e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
index 5594b3833c..71166778fc 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_curhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:53:13
+DATE: 2024-09-15_11:56:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.515702e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.552924e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.556552e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.530906e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.566446e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.569964e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.474495 sec
+TOTAL       :     0.476038 sec
 INFO: No Floating Point Exceptions have been reported
-     2,011,973,393      cycles                           #    2.920 GHz                    
-     3,012,287,196      instructions                     #    1.50  insn per cycle         
-       0.746226316 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
+     1,985,362,661      cycles                           #    2.871 GHz                    
+     2,993,731,363      instructions                     #    1.51  insn per cycle         
+       0.748126319 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --curhst
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.157102e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.219957e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.222675e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.141583e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.198514e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.201079e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.819017 sec
+TOTAL       :     1.826652 sec
 INFO: No Floating Point Exceptions have been reported
-     6,066,507,433      cycles                           #    2.960 GHz                    
-    12,322,393,310      instructions                     #    2.03  insn per cycle         
-       2.106524554 seconds time elapsed
+     5,952,227,239      cycles                           #    2.897 GHz                    
+    12,995,171,990      instructions                     #    2.18  insn per cycle         
+       2.112711437 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.960123e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.961113e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.961113e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.933291e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934237e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934237e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.372365 sec
+TOTAL       :     8.487946 sec
 INFO: No Floating Point Exceptions have been reported
-    24,913,204,556      cycles                           #    2.975 GHz                    
-    79,109,275,563      instructions                     #    3.18  insn per cycle         
-       8.376463077 seconds time elapsed
+    24,924,562,928      cycles                           #    2.936 GHz                    
+    79,109,269,886      instructions                     #    3.17  insn per cycle         
+       8.491506444 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274863312764526E-004
 Relative difference = 4.998523613136231e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.100093e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.113139e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.113139e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.966222e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.978443e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.978443e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.314710 sec
+TOTAL       :     2.359225 sec
 INFO: No Floating Point Exceptions have been reported
-     6,539,620,343      cycles                           #    2.821 GHz                    
-    20,270,856,622      instructions                     #    3.10  insn per cycle         
-       2.318690633 seconds time elapsed
+     6,526,176,570      cycles                           #    2.763 GHz                    
+    20,269,541,830      instructions                     #    3.11  insn per cycle         
+       2.362842375 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861442972011E-004
 Relative difference = 2.1772539563413118e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.634403e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.641056e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.641056e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.543496e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.549592e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.549592e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.008306 sec
+TOTAL       :     1.067538 sec
 INFO: No Floating Point Exceptions have been reported
-     2,848,061,854      cycles                           #    2.816 GHz                    
-     7,066,418,028      instructions                     #    2.48  insn per cycle         
-       1.012267305 seconds time elapsed
+     2,860,867,267      cycles                           #    2.672 GHz                    
+     7,065,461,760      instructions                     #    2.47  insn per cycle         
+       1.071393478 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.832427e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.840853e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.840853e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.800483e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.808998e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.808998e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.899982 sec
+TOTAL       :     0.915759 sec
 INFO: No Floating Point Exceptions have been reported
-     2,519,286,089      cycles                           #    2.789 GHz                    
-     6,403,779,811      instructions                     #    2.54  insn per cycle         
-       0.904006492 seconds time elapsed
+     2,516,887,683      cycles                           #    2.739 GHz                    
+     6,403,177,488      instructions                     #    2.54  insn per cycle         
+       0.919482910 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11021) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --curhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.443265e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.448590e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.448590e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.400235e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.405247e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.405247e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.141808 sec
+TOTAL       :     1.176437 sec
 INFO: No Floating Point Exceptions have been reported
-     2,068,467,128      cycles                           #    1.806 GHz                    
-     3,304,001,462      instructions                     #    1.60  insn per cycle         
-       1.145999074 seconds time elapsed
+     2,065,991,158      cycles                           #    1.752 GHz                    
+     3,303,729,120      instructions                     #    1.60  insn per cycle         
+       1.180108788 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2601) (512y:   46) (512z: 9605)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952779718007E-004
 Relative difference = 4.194411063934945e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
index 2c8782ca43..dd6ac10521 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd0_rmbhst.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,76 +11,76 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:50:29
+DATE: 2024-09-15_11:54:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.073163e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.551560e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.555538e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.065242e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.543001e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.546426e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.048178e+00 +- 2.364571e+00 )  GeV^-4
-TOTAL       :     0.478101 sec
+TOTAL       :     0.481262 sec
 INFO: No Floating Point Exceptions have been reported
-     2,015,805,791      cycles                           #    2.914 GHz                    
-     3,037,316,901      instructions                     #    1.51  insn per cycle         
-       0.749509646 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
+     1,996,897,335      cycles                           #    2.872 GHz                    
+     2,970,634,149      instructions                     #    1.49  insn per cycle         
+       0.754034663 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --rmbhst
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --rmbhst OMP=
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.119461e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.190458e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.193209e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.141287e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.200996e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.203446e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.641709e+00 +- 4.994248e+00 )  GeV^-4
-TOTAL       :     1.896617 sec
+TOTAL       :     1.902964 sec
 INFO: No Floating Point Exceptions have been reported
-     6,339,268,119      cycles                           #    2.968 GHz                    
-    13,650,810,628      instructions                     #    2.15  insn per cycle         
-       2.193112785 seconds time elapsed
+     6,190,673,302      cycles                           #    2.901 GHz                    
+    13,306,269,630      instructions                     #    2.15  insn per cycle         
+       2.189368892 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -88,33 +88,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.974482e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.975454e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.975454e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.933487e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934407e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934407e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.311393 sec
+TOTAL       :     8.487231 sec
 INFO: No Floating Point Exceptions have been reported
-    24,932,671,724      cycles                           #    2.999 GHz                    
-    79,109,238,941      instructions                     #    3.17  insn per cycle         
-       8.315386942 seconds time elapsed
+    24,900,546,223      cycles                           #    2.933 GHz                    
+    79,107,234,987      instructions                     #    3.18  insn per cycle         
+       8.490956598 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3572) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -122,31 +122,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274863312764526E-004
 Relative difference = 4.998523613136231e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.101882e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.114594e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.114594e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.983704e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.996217e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.996217e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.314306 sec
+TOTAL       :     2.353085 sec
 INFO: No Floating Point Exceptions have been reported
-     6,536,135,374      cycles                           #    2.820 GHz                    
-    20,272,209,719      instructions                     #    3.10  insn per cycle         
-       2.318509943 seconds time elapsed
+     6,541,995,614      cycles                           #    2.777 GHz                    
+    20,269,407,860      instructions                     #    3.10  insn per cycle         
+       2.356873297 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13779) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -154,31 +154,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861442972011E-004
 Relative difference = 2.1772539563413118e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.623539e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.630403e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.630403e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.585450e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.591953e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.591953e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.015014 sec
+TOTAL       :     1.039372 sec
 INFO: No Floating Point Exceptions have been reported
-     2,855,704,305      cycles                           #    2.804 GHz                    
-     7,065,922,001      instructions                     #    2.47  insn per cycle         
-       1.019088436 seconds time elapsed
+     2,850,375,088      cycles                           #    2.735 GHz                    
+     7,065,899,998      instructions                     #    2.48  insn per cycle         
+       1.043028953 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12055) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -186,31 +186,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.834257e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.842711e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.842711e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.792341e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.800787e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.800787e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.899190 sec
+TOTAL       :     0.920003 sec
 INFO: No Floating Point Exceptions have been reported
-     2,520,844,071      cycles                           #    2.793 GHz                    
-     6,403,739,146      instructions                     #    2.54  insn per cycle         
-       0.903188458 seconds time elapsed
+     2,517,551,147      cycles                           #    2.728 GHz                    
+     6,403,207,803      instructions                     #    2.54  insn per cycle         
+       0.923687532 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11021) (512y:   43) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -218,31 +218,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271938174396888E-004
 Relative difference = 2.7547150614455683e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 1 --rmbhst OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.454972e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.460357e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.460357e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.402198e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.407285e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.407285e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.132255 sec
+TOTAL       :     1.174725 sec
 INFO: No Floating Point Exceptions have been reported
-     2,069,344,265      cycles                           #    1.822 GHz                    
-     3,303,922,317      instructions                     #    1.60  insn per cycle         
-       1.136365608 seconds time elapsed
+     2,068,104,176      cycles                           #    1.756 GHz                    
+     3,303,725,822      instructions                     #    1.60  insn per cycle         
+       1.178407380 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2601) (512y:   46) (512z: 9605)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -250,8 +250,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952779718007E-004
 Relative difference = 4.194411063934945e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
index fc75544cb4..0807d31ee5 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:20:19
+DATE: 2024-09-15_11:17:26
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.514511e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.549897e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.553526e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.482391e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.527725e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.532031e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059596e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.491830 sec
+TOTAL       :     0.493760 sec
 INFO: No Floating Point Exceptions have been reported
-     2,056,287,025      cycles                           #    2.911 GHz                    
-     3,100,773,025      instructions                     #    1.51  insn per cycle         
-       0.767274646 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
+     2,047,228,800      cycles                           #    2.877 GHz                    
+     3,039,242,832      instructions                     #    1.48  insn per cycle         
+       0.768472979 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.110886e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.165093e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.167444e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.102852e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.164567e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.167207e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.793509 sec
+TOTAL       :     1.802499 sec
 INFO: No Floating Point Exceptions have been reported
-     6,014,561,793      cycles                           #    2.958 GHz                    
-    11,881,033,580      instructions                     #    1.98  insn per cycle         
-       2.090194643 seconds time elapsed
+     5,908,213,594      cycles                           #    2.909 GHz                    
+    12,456,477,911      instructions                     #    2.11  insn per cycle         
+       2.093284072 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262667672387088E-004
 Relative difference = 2.825534762507892e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.968566e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.969505e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.969505e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.936218e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.937180e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.937180e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060121e+00 +- 2.367902e+00 )  GeV^-4
-TOTAL       :     8.335960 sec
+TOTAL       :     8.475402 sec
 INFO: No Floating Point Exceptions have been reported
-    24,939,697,284      cycles                           #    2.991 GHz                    
-    78,843,077,860      instructions                     #    3.16  insn per cycle         
-       8.339952201 seconds time elapsed
+    24,949,332,764      cycles                           #    2.943 GHz                    
+    78,839,555,653      instructions                     #    3.16  insn per cycle         
+       8.479529977 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3092) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627487e-04
 Avg ME (F77/C++)    = 6.6274866250177339E-004
 Relative difference = 5.65798569465384e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.281829e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.295515e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.295515e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.122699e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.135567e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.135567e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060119e+00 +- 2.367901e+00 )  GeV^-4
-TOTAL       :     2.256952 sec
+TOTAL       :     2.306947 sec
 INFO: No Floating Point Exceptions have been reported
-     6,466,466,189      cycles                           #    2.861 GHz                    
-    20,229,917,334      instructions                     #    3.13  insn per cycle         
-       2.261129242 seconds time elapsed
+     6,466,639,499      cycles                           #    2.800 GHz                    
+    20,230,851,658      instructions                     #    3.13  insn per cycle         
+       2.310638118 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13491) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274861448331612E-004
 Relative difference = 2.1853408865157068e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.549288e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.555397e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.555397e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.507818e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.513887e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.513887e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     1.063285 sec
+TOTAL       :     1.092466 sec
 INFO: No Floating Point Exceptions have been reported
-     2,979,164,688      cycles                           #    2.793 GHz                    
-     7,206,814,892      instructions                     #    2.42  insn per cycle         
-       1.067401576 seconds time elapsed
+     2,980,915,950      cycles                           #    2.722 GHz                    
+     7,206,628,057      instructions                     #    2.42  insn per cycle         
+       1.096222389 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:12437) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271939668088170E-004
 Relative difference = 5.008331292535666e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.759679e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.767582e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.767582e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.724603e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.732183e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.732183e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060560e+00 +- 2.367611e+00 )  GeV^-4
-TOTAL       :     0.936636 sec
+TOTAL       :     0.955874 sec
 INFO: No Floating Point Exceptions have been reported
-     2,613,305,137      cycles                           #    2.780 GHz                    
-     6,544,993,852      instructions                     #    2.50  insn per cycle         
-       0.940671045 seconds time elapsed
+     2,613,667,112      cycles                           #    2.726 GHz                    
+     6,544,516,026      instructions                     #    2.50  insn per cycle         
+       0.959652526 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11449) (512y:   26) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627194e-04
 Avg ME (F77/C++)    = 6.6271939668088170E-004
 Relative difference = 5.008331292535666e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.401401e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.406341e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.406341e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.352025e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.356715e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.356715e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060562e+00 +- 2.367612e+00 )  GeV^-4
-TOTAL       :     1.175223 sec
+TOTAL       :     1.218129 sec
 INFO: No Floating Point Exceptions have been reported
-     2,142,034,864      cycles                           #    1.818 GHz                    
-     3,462,437,313      instructions                     #    1.62  insn per cycle         
-       1.179316947 seconds time elapsed
+     2,137,040,914      cycles                           #    1.750 GHz                    
+     3,460,849,319      instructions                     #    1.62  insn per cycle         
+       1.221974093 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3037) (512y:   25) (512z: 9677)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627195e-04
 Avg ME (F77/C++)    = 6.6271952032316561E-004
 Relative difference = 3.066631594207157e-08
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
index bd734c7984..507a64eed8 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:37:31
+DATE: 2024-09-15_11:39:16
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.598056e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.633275e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.637037e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.567838e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.605874e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.609566e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.493073 sec
+TOTAL       :     0.496412 sec
 INFO: No Floating Point Exceptions have been reported
-     2,074,178,646      cycles                           #    2.923 GHz                    
-     3,133,020,237      instructions                     #    1.51  insn per cycle         
-       0.769870120 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1
+     2,051,587,227      cycles                           #    2.873 GHz                    
+     3,025,794,403      instructions                     #    1.47  insn per cycle         
+       0.774558823 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.631649e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.697107e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.700087e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.651284e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.721094e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.724249e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.730123 sec
+TOTAL       :     1.731680 sec
 INFO: No Floating Point Exceptions have been reported
-     5,836,756,305      cycles                           #    2.975 GHz                    
-    12,510,459,982      instructions                     #    2.14  insn per cycle         
-       2.018656217 seconds time elapsed
+     5,770,677,421      cycles                           #    2.916 GHz                    
+    12,010,283,008      instructions                     #    2.08  insn per cycle         
+       2.035197700 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262669162351490E-004
 Relative difference = 2.8232862531213374e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.686722e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.687531e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.687531e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.459828e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.460600e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.460600e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    28.844831 sec
+TOTAL       :    30.042280 sec
 INFO: No Floating Point Exceptions have been reported
-    86,631,947,563      cycles                           #    3.003 GHz                    
-   135,663,970,094      instructions                     #    1.57  insn per cycle         
-      28.848826071 seconds time elapsed
+    86,122,252,676      cycles                           #    2.867 GHz                    
+   135,657,307,138      instructions                     #    1.58  insn per cycle         
+      30.046456599 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:15856) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627535e-04
 Avg ME (F77/C++)    = 6.6275349717465765E-004
 Relative difference = 4.26303654465793e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.982423e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.994855e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.994855e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.672428e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.686393e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.686393e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.353688 sec
+TOTAL       :     2.462988 sec
 INFO: No Floating Point Exceptions have been reported
-     6,754,455,687      cycles                           #    2.866 GHz                    
-    19,356,639,694      instructions                     #    2.87  insn per cycle         
-       2.357792657 seconds time elapsed
+     6,758,193,786      cycles                           #    2.742 GHz                    
+    19,357,772,182      instructions                     #    2.86  insn per cycle         
+       2.467248153 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69591) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274862748188362E-004
 Relative difference = 4.14665283800746e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.465272e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.470907e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.470907e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.362305e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.367046e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.367046e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.124213 sec
+TOTAL       :     1.208685 sec
 INFO: No Floating Point Exceptions have been reported
-     3,168,613,598      cycles                           #    2.810 GHz                    
-     6,792,791,682      instructions                     #    2.14  insn per cycle         
-       1.128353409 seconds time elapsed
+     3,166,621,827      cycles                           #    2.612 GHz                    
+     6,792,444,940      instructions                     #    2.15  insn per cycle         
+       1.212802697 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:49012) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627273e-04
 Avg ME (F77/C++)    = 6.6272731568543797E-004
 Relative difference = 2.3668012430631962e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.761149e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.768816e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.768816e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.652877e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.659885e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.659885e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.936182 sec
+TOTAL       :     0.997146 sec
 INFO: No Floating Point Exceptions have been reported
-     2,624,003,917      cycles                           #    2.793 GHz                    
-     5,970,228,341      instructions                     #    2.28  insn per cycle         
-       0.940254225 seconds time elapsed
+     2,625,468,482      cycles                           #    2.624 GHz                    
+     5,970,509,824      instructions                     #    2.27  insn per cycle         
+       1.001249505 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:42601) (512y:   11) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627273e-04
 Avg ME (F77/C++)    = 6.6272731568543797E-004
 Relative difference = 2.3668012430631962e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.450303e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.455467e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.455467e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.322992e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.327409e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.327409e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.136203 sec
+TOTAL       :     1.244528 sec
 INFO: No Floating Point Exceptions have been reported
-     2,073,701,956      cycles                           #    1.820 GHz                    
-     3,493,813,941      instructions                     #    1.68  insn per cycle         
-       1.140373665 seconds time elapsed
+     2,076,691,772      cycles                           #    1.664 GHz                    
+     3,494,505,327      instructions                     #    1.68  insn per cycle         
+       1.248709350 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 5207) (512y:    3) (512z:44836)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627275e-04
 Avg ME (F77/C++)    = 6.6272750237027223E-004
 Relative difference = 3.5765412974815996e-09
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
index 84ffcded94..c027e74779 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_f_inl1_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:38:20
+DATE: 2024-09-15_11:40:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.587128e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.623135e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.626795e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.562972e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.598287e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.601913e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059597e+00 +- 2.368053e+00 )  GeV^-4
-TOTAL       :     0.490425 sec
+TOTAL       :     0.492421 sec
 INFO: No Floating Point Exceptions have been reported
-     2,067,306,761      cycles                           #    2.921 GHz                    
-     3,120,326,206      instructions                     #    1.51  insn per cycle         
-       0.767854438 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1
+     2,010,223,902      cycles                           #    2.826 GHz                    
+     3,031,193,233      instructions                     #    1.51  insn per cycle         
+       0.770287796 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.673672e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.739936e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.742910e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.689601e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.749985e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.752940e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.664703e+00 +- 5.072736e+00 )  GeV^-4
-TOTAL       :     1.727194 sec
+TOTAL       :     1.732807 sec
 INFO: No Floating Point Exceptions have been reported
-     5,838,535,470      cycles                           #    2.981 GHz                    
-    11,756,766,439      instructions                     #    2.01  insn per cycle         
-       2.016509360 seconds time elapsed
+     5,640,346,322      cycles                           #    2.872 GHz                    
+    11,210,275,869      instructions                     #    1.99  insn per cycle         
+       2.022037581 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_f_inl1_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626454e-04
 Avg ME (F77/GPU)   = 6.6262669162351490E-004
 Relative difference = 2.8232862531213374e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_f_inl1_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.753740e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.754563e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.754563e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.446094e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.446837e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.446837e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059969e+00 +- 2.367799e+00 )  GeV^-4
-TOTAL       :    28.508335 sec
+TOTAL       :    30.118401 sec
 INFO: No Floating Point Exceptions have been reported
-    85,830,228,914      cycles                           #    3.011 GHz                    
-   135,366,257,259      instructions                     #    1.58  insn per cycle         
-      28.512429190 seconds time elapsed
+    86,113,084,692      cycles                           #    2.859 GHz                    
+   135,363,065,912      instructions                     #    1.57  insn per cycle         
+      30.122446956 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:15471) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627535e-04
 Avg ME (F77/C++)    = 6.6275349662128086E-004
 Relative difference = 5.098002770919431e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.811935e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.824056e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.824056e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.516652e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.527742e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.527742e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.059962e+00 +- 2.367792e+00 )  GeV^-4
-TOTAL       :     2.412491 sec
+TOTAL       :     2.521798 sec
 INFO: No Floating Point Exceptions have been reported
-     6,846,250,673      cycles                           #    2.834 GHz                    
-    19,407,013,482      instructions                     #    2.83  insn per cycle         
-       2.416838266 seconds time elapsed
+     6,856,870,344      cycles                           #    2.715 GHz                    
+    19,407,796,379      instructions                     #    2.83  insn per cycle         
+       2.529187527 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:69622) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627486e-04
 Avg ME (F77/C++)    = 6.6274862799683282E-004
 Relative difference = 4.2243518621014775e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.495437e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.501066e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.501066e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.378784e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.383778e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.383778e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.101526 sec
+TOTAL       :     1.194647 sec
 INFO: No Floating Point Exceptions have been reported
-     3,103,372,858      cycles                           #    2.809 GHz                    
-     6,715,804,215      instructions                     #    2.16  insn per cycle         
-       1.105678914 seconds time elapsed
+     3,106,911,149      cycles                           #    2.593 GHz                    
+     6,716,375,817      instructions                     #    2.16  insn per cycle         
+       1.199018593 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:47699) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627273e-04
 Avg ME (F77/C++)    = 6.6272731623419345E-004
 Relative difference = 2.449603850635964e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.771188e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.779181e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.779181e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.633831e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.642301e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.642301e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060903e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     0.930819 sec
+TOTAL       :     1.009236 sec
 INFO: No Floating Point Exceptions have been reported
-     2,625,713,520      cycles                           #    2.810 GHz                    
-     5,968,739,740      instructions                     #    2.27  insn per cycle         
-       0.934943696 seconds time elapsed
+     2,628,290,758      cycles                           #    2.601 GHz                    
+     5,969,462,739      instructions                     #    2.27  insn per cycle         
+       1.017917591 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:41882) (512y:   13) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627273e-04
 Avg ME (F77/C++)    = 6.6272731623419345E-004
 Relative difference = 2.449603850635964e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=1] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.430515e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.435702e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.435702e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.325974e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.330533e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.330533e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 4.060905e+00 +- 2.367377e+00 )  GeV^-4
-TOTAL       :     1.151409 sec
+TOTAL       :     1.242092 sec
 INFO: No Floating Point Exceptions have been reported
-     2,066,103,190      cycles                           #    1.789 GHz                    
-     3,487,311,772      instructions                     #    1.69  insn per cycle         
-       1.155609949 seconds time elapsed
+     2,077,381,824      cycles                           #    1.674 GHz                    
+     3,490,865,426      instructions                     #    1.68  insn per cycle         
+       1.248861709 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4171) (512y:    4) (512z:44487)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_f_inl1_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.627275e-04
 Avg ME (F77/C++)    = 6.6272750247886592E-004
 Relative difference = 3.740400032174438e-09
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
index 2f68bb4a80..f1d40dff2c 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:20:46
+DATE: 2024-09-15_11:17:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.331621e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.355030e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.356680e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.307071e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.337162e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.339154e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.530458 sec
+TOTAL       :     0.532508 sec
 INFO: No Floating Point Exceptions have been reported
-     2,214,937,387      cycles                           #    2.872 GHz                    
-     3,503,262,390      instructions                     #    1.58  insn per cycle         
-       0.829415308 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
+     2,203,526,312      cycles                           #    2.869 GHz                    
+     3,467,986,959      instructions                     #    1.57  insn per cycle         
+       0.824379177 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.144030e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.171626e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.172804e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.133497e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.164330e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.165560e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.032896 sec
+TOTAL       :     3.045403 sec
 INFO: No Floating Point Exceptions have been reported
-     9,787,989,221      cycles                           #    2.968 GHz                    
-    22,358,228,581      instructions                     #    2.28  insn per cycle         
-       3.355717561 seconds time elapsed
+     9,651,769,916      cycles                           #    2.918 GHz                    
+    21,560,396,285      instructions                     #    2.23  insn per cycle         
+       3.363407224 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266732376103494E-004
 Relative difference = 2.659538381540814e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.889361e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.890286e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.890286e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.856718e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.857596e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.857596e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.686369 sec
+TOTAL       :     8.839571 sec
 INFO: No Floating Point Exceptions have been reported
-    25,927,576,057      cycles                           #    2.984 GHz                    
-    79,425,809,752      instructions                     #    3.06  insn per cycle         
-       8.690475608 seconds time elapsed
+    25,916,613,183      cycles                           #    2.931 GHz                    
+    79,423,792,934      instructions                     #    3.06  insn per cycle         
+       8.843857471 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4775) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731406016235E-004
 Relative difference = 2.8059296349552523e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.578240e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.581401e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.581401e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.495399e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.498545e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.498545e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.589410 sec
+TOTAL       :     4.699126 sec
 INFO: No Floating Point Exceptions have been reported
-    12,833,581,935      cycles                           #    2.794 GHz                    
-    38,823,261,134      instructions                     #    3.03  insn per cycle         
-       4.593493589 seconds time elapsed
+    12,847,395,150      cycles                           #    2.733 GHz                    
+    38,826,102,030      instructions                     #    3.02  insn per cycle         
+       4.703180057 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:13173) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730246908442E-004
 Relative difference = 2.98084507782618e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.161910e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.177971e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.177971e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.037182e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.053225e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.053225e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.015681 sec
+TOTAL       :     2.047520 sec
 INFO: No Floating Point Exceptions have been reported
-     5,623,032,218      cycles                           #    2.785 GHz                    
-    13,615,351,145      instructions                     #    2.42  insn per cycle         
-       2.019952518 seconds time elapsed
+     5,598,661,180      cycles                           #    2.730 GHz                    
+    13,618,631,873      instructions                     #    2.43  insn per cycle         
+       2.051512013 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11427) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.464776e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.487442e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.487442e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.221665e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.243251e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.243251e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.739023 sec
+TOTAL       :     1.785642 sec
 INFO: No Floating Point Exceptions have been reported
-     4,862,882,649      cycles                           #    2.791 GHz                    
-    12,296,492,579      instructions                     #    2.53  insn per cycle         
-       1.743291896 seconds time elapsed
+     4,865,374,839      cycles                           #    2.720 GHz                    
+    12,297,660,832      instructions                     #    2.53  insn per cycle         
+       1.789585857 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10331) (512y:   79) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.177088e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.189830e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.189830e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.872514e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.884400e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.884400e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.291509 sec
+TOTAL       :     2.393483 sec
 INFO: No Floating Point Exceptions have been reported
-     4,173,466,182      cycles                           #    1.819 GHz                    
-     6,390,838,755      instructions                     #    1.53  insn per cycle         
-       2.295964952 seconds time elapsed
+     4,171,721,525      cycles                           #    1.741 GHz                    
+     6,391,185,056      instructions                     #    1.53  insn per cycle         
+       2.397568985 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1980) (512y:   93) (512z: 9360)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
diff --git a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
index bb6eacdb67..ae3635632d 100644
--- a/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttgg_mad/log_ggttgg_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg'
 
-DATE: 2024-09-01_23:21:19
+DATE: 2024-09-15_11:18:27
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.322191e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.344102e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.345776e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.320961e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.350573e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.352533e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     0.529512 sec
+TOTAL       :     0.532217 sec
 INFO: No Floating Point Exceptions have been reported
-     2,233,296,216      cycles                           #    2.915 GHz                    
-     3,529,609,506      instructions                     #    1.58  insn per cycle         
-       0.825080098 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
+     2,207,479,579      cycles                           #    2.873 GHz                    
+     3,464,148,832      instructions                     #    1.57  insn per cycle         
+       0.824297603 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.141564e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.169144e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.170301e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.148990e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.180422e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.181658e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 6.665112e+00 +- 5.002651e+00 )  GeV^-4
-TOTAL       :     3.033131 sec
+TOTAL       :     3.035739 sec
 INFO: No Floating Point Exceptions have been reported
-     9,779,205,799      cycles                           #    2.967 GHz                    
-    21,659,055,827      instructions                     #    2.21  insn per cycle         
-       3.355957763 seconds time elapsed
+     9,612,803,881      cycles                           #    2.915 GHz                    
+    20,074,302,744      instructions                     #    2.09  insn per cycle         
+       3.353532451 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 6.626675e-04
 Avg ME (F77/GPU)   = 6.6266732376103494E-004
 Relative difference = 2.659538381540814e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.887290e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.888180e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.888180e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.831511e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.832364e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.832364e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     8.695848 sec
+TOTAL       :     8.961804 sec
 INFO: No Floating Point Exceptions have been reported
-    25,991,391,295      cycles                           #    2.988 GHz                    
-    79,451,765,257      instructions                     #    3.06  insn per cycle         
-       8.700070515 seconds time elapsed
+    26,010,493,082      cycles                           #    2.902 GHz                    
+    79,449,384,960      instructions                     #    3.05  insn per cycle         
+       8.965752302 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 4431) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266731406016235E-004
 Relative difference = 2.8059296349552523e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.584459e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.587768e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.587768e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.477024e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.480127e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.480127e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     4.581005 sec
+TOTAL       :     4.723753 sec
 INFO: No Floating Point Exceptions have been reported
-    12,823,284,183      cycles                           #    2.797 GHz                    
-    38,780,792,036      instructions                     #    3.02  insn per cycle         
-       4.585213943 seconds time elapsed
+    12,826,084,303      cycles                           #    2.714 GHz                    
+    38,778,289,694      instructions                     #    3.02  insn per cycle         
+       4.727826379 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:12935) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730246908442E-004
 Relative difference = 2.98084507782618e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.220702e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.236918e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.236918e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.051767e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.067821e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.067821e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.001136 sec
+TOTAL       :     2.043694 sec
 INFO: No Floating Point Exceptions have been reported
-     5,586,247,922      cycles                           #    2.787 GHz                    
-    13,732,848,224      instructions                     #    2.46  insn per cycle         
-       2.005304858 seconds time elapsed
+     5,591,778,218      cycles                           #    2.733 GHz                    
+    13,733,552,430      instructions                     #    2.46  insn per cycle         
+       2.047665232 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:11510) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 9.263881e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.285362e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.285362e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.123898e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.144357e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.144357e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     1.776462 sec
+TOTAL       :     1.804403 sec
 INFO: No Floating Point Exceptions have been reported
-     4,951,429,315      cycles                           #    2.782 GHz                    
-    12,423,212,120      instructions                     #    2.51  insn per cycle         
-       1.780606206 seconds time elapsed
+     4,951,573,094      cycles                           #    2.739 GHz                    
+    12,422,632,916      instructions                     #    2.51  insn per cycle         
+       1.808331695 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:10322) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.149223e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.161842e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.161842e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.884699e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.896731e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.896731e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 4.063123e+00 +- 2.368970e+00 )  GeV^-4
-TOTAL       :     2.300056 sec
+TOTAL       :     2.389141 sec
 INFO: No Floating Point Exceptions have been reported
-     4,175,293,644      cycles                           #    1.813 GHz                    
-     6,494,363,004      instructions                     #    1.56  insn per cycle         
-       2.304264144 seconds time elapsed
+     4,181,828,175      cycles                           #    1.750 GHz                    
+     6,496,177,989      instructions                     #    1.55  insn per cycle         
+       2.393377398 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1803) (512y:  191) (512z: 9369)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 123 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 123 channels { 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32, 17 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttgg.mad/SubProcesses/P1_gg_ttxgg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 6.626675e-04
 Avg ME (F77/C++)    = 6.6266730409276857E-004
 Relative difference = 2.956342832710188e-07
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
index 98ba462e3e..407fbbe6c0 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:23:11
+DATE: 2024-09-15_11:20:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.059306e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.059673e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.059842e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.059284e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.059685e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.059813e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.431556 sec
+TOTAL       :     2.451621 sec
 INFO: No Floating Point Exceptions have been reported
-     8,193,040,145      cycles                           #    2.976 GHz                    
-    18,582,642,986      instructions                     #    2.27  insn per cycle         
-       2.812032026 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
+     8,080,887,114      cycles                           #    2.907 GHz                    
+    16,734,437,330      instructions                     #    2.07  insn per cycle         
+       2.836211679 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.218449e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.220375e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.220612e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.254596e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.256737e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.256950e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.004890 sec
+TOTAL       :     4.020216 sec
 INFO: No Floating Point Exceptions have been reported
-    12,865,885,820      cycles                           #    2.967 GHz                    
-    28,902,839,656      instructions                     #    2.25  insn per cycle         
-       4.392213239 seconds time elapsed
+    12,719,492,672      cycles                           #    2.923 GHz                    
+    29,448,097,640      instructions                     #    2.32  insn per cycle         
+       4.407436029 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.872263e-03
 Avg ME (F77/GPU)   = 9.8722595284406640E-003
 Relative difference = 3.5164777671934515e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.740296e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.740516e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.740516e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.610103e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.610301e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.610301e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.825078 sec
+TOTAL       :     6.935255 sec
 INFO: No Floating Point Exceptions have been reported
-    19,056,883,522      cycles                           #    2.791 GHz                    
-    53,906,242,080      instructions                     #    2.83  insn per cycle         
-       6.829199798 seconds time elapsed
+    18,974,774,871      cycles                           #    2.735 GHz                    
+    53,899,721,094      instructions                     #    2.84  insn per cycle         
+       6.939338261 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.611722e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.611814e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.611814e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.579226e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.579318e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.579318e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.278065 sec
+TOTAL       :     3.345304 sec
 INFO: No Floating Point Exceptions have been reported
-     9,778,535,399      cycles                           #    2.980 GHz                    
-    27,150,865,076      instructions                     #    2.78  insn per cycle         
-       3.282250786 seconds time elapsed
+     9,800,813,517      cycles                           #    2.927 GHz                    
+    27,149,189,789      instructions                     #    2.77  insn per cycle         
+       3.349514409 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.476638e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.477048e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.477048e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.366336e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.366803e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.366803e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.520727 sec
+TOTAL       :     1.570292 sec
 INFO: No Floating Point Exceptions have been reported
-     4,244,700,698      cycles                           #    2.785 GHz                    
-     9,590,825,029      instructions                     #    2.26  insn per cycle         
-       1.524880781 seconds time elapsed
+     4,287,053,926      cycles                           #    2.724 GHz                    
+     9,590,127,631      instructions                     #    2.24  insn per cycle         
+       1.574599019 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84961) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.994080e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.994623e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.994623e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.904765e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.905290e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.905290e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.323946 sec
+TOTAL       :     1.353979 sec
 INFO: No Floating Point Exceptions have been reported
-     3,716,328,988      cycles                           #    2.800 GHz                    
-     8,515,493,481      instructions                     #    2.29  insn per cycle         
-       1.328049109 seconds time elapsed
+     3,709,436,689      cycles                           #    2.733 GHz                    
+     8,514,247,183      instructions                     #    2.30  insn per cycle         
+       1.357880276 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80609) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.542686e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.543288e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.543288e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.407683e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.408196e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.408196e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.492969 sec
+TOTAL       :     1.552623 sec
 INFO: No Floating Point Exceptions have been reported
-     2,695,876,323      cycles                           #    1.802 GHz                    
-     4,282,247,178      instructions                     #    1.59  insn per cycle         
-       1.497260293 seconds time elapsed
+     2,699,560,921      cycles                           #    1.736 GHz                    
+     4,280,862,154      instructions                     #    1.59  insn per cycle         
+       1.556608026 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2853) (512y:  103) (512z:79114)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
index 717f8886d6..e032151033 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,11 +21,11 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:45:42
+DATE: 2024-09-15_11:49:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -34,17 +34,17 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.053935e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.056881e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.056881e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.054597e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.057500e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.057500e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.379946 sec
+TOTAL       :     2.436820 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     8,083,288,425      cycles                           #    2.992 GHz                    
-    17,108,705,420      instructions                     #    2.12  insn per cycle         
-       2.760730215 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
+     8,044,156,582      cycles                           #    2.913 GHz                    
+    18,167,469,518      instructions                     #    2.26  insn per cycle         
+       2.819990438 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256)
@@ -52,7 +52,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -61,18 +61,18 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.231350e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.265023e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.265023e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.188185e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.221546e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.221546e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.986993 sec
+TOTAL       :     4.024442 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    12,934,982,025      cycles                           #    2.992 GHz                    
-    29,294,622,576      instructions                     #    2.26  insn per cycle         
-       4.378595360 seconds time elapsed
+    12,704,137,155      cycles                           #    2.918 GHz                    
+    27,467,799,669      instructions                     #    2.16  insn per cycle         
+       4.411963692 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -80,35 +80,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.872263e-03
 Avg ME (F77/GPU)   = 9.8722595284406640E-003
 Relative difference = 3.5164777671934515e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.734772e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.735047e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.735047e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.352982e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.353176e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.353176e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.825716 sec
+TOTAL       :     7.182913 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    19,050,334,983      cycles                           #    2.790 GHz                    
-    53,904,991,030      instructions                     #    2.83  insn per cycle         
-       6.829977475 seconds time elapsed
+    19,533,134,643      cycles                           #    2.719 GHz                    
+    53,904,822,620      instructions                     #    2.76  insn per cycle         
+       7.186820393 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32424) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -116,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.615071e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.615160e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.615160e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.583220e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.583307e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.583307e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.270482 sec
+TOTAL       :     3.337581 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     9,810,845,524      cycles                           #    2.997 GHz                    
-    27,153,154,462      instructions                     #    2.77  insn per cycle         
-       3.274772696 seconds time elapsed
+     9,779,129,351      cycles                           #    2.927 GHz                    
+    27,151,664,900      instructions                     #    2.78  insn per cycle         
+       3.341583664 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96492) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -150,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.459833e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.460241e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.460241e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.365450e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.365854e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.365854e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.527944 sec
+TOTAL       :     1.570834 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,289,516,676      cycles                           #    2.801 GHz                    
-     9,592,867,163      instructions                     #    2.24  insn per cycle         
-       1.532272856 seconds time elapsed
+     4,275,074,655      cycles                           #    2.716 GHz                    
+     9,592,294,661      instructions                     #    2.24  insn per cycle         
+       1.574792391 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84961) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -184,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.939776e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.940326e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.940326e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.876513e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.877048e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.877048e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.342790 sec
+TOTAL       :     1.364090 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     3,731,950,548      cycles                           #    2.772 GHz                    
-     8,517,158,155      instructions                     #    2.28  insn per cycle         
-       1.347148638 seconds time elapsed
+     3,720,013,902      cycles                           #    2.721 GHz                    
+     8,517,094,572      instructions                     #    2.29  insn per cycle         
+       1.368386654 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80609) (512y:   89) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -218,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.477757e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.478322e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.478322e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.420108e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.420617e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.420617e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.519990 sec
+TOTAL       :     1.546895 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,699,759,683      cycles                           #    1.772 GHz                    
-     4,284,442,288      instructions                     #    1.59  insn per cycle         
-       1.524543784 seconds time elapsed
+     2,698,104,238      cycles                           #    1.741 GHz                    
+     4,283,566,876      instructions                     #    1.59  insn per cycle         
+       1.551097954 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2853) (512y:  103) (512z:79114)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -252,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
index 1483c9bce0..71b1803a4d 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:24:38
+DATE: 2024-09-15_11:21:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.058852e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.059241e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.059373e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.055075e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.055529e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.055650e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     2.431399 sec
+TOTAL       :     2.454190 sec
 INFO: No Floating Point Exceptions have been reported
-     8,193,686,544      cycles                           #    2.973 GHz                    
-    18,087,344,365      instructions                     #    2.21  insn per cycle         
-       2.812916099 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
+     8,101,270,896      cycles                           #    2.912 GHz                    
+    18,320,414,768      instructions                     #    2.26  insn per cycle         
+       2.837550341 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.226524e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.228420e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.228644e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.224205e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.226444e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.226692e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     4.008011 sec
+TOTAL       :     4.029157 sec
 INFO: No Floating Point Exceptions have been reported
-    12,900,895,160      cycles                           #    2.974 GHz                    
-    30,531,158,775      instructions                     #    2.37  insn per cycle         
-       4.392074978 seconds time elapsed
+    12,695,828,795      cycles                           #    2.910 GHz                    
+    28,709,503,011      instructions                     #    2.26  insn per cycle         
+       4.420420636 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.872263e-03
 Avg ME (F77/GPU)   = 9.8722595284406640E-003
 Relative difference = 3.5164777671934515e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.805727e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.805945e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.805945e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.201824e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.202080e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.202080e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.764030 sec
+TOTAL       :     6.438167 sec
 INFO: No Floating Point Exceptions have been reported
-    18,779,280,912      cycles                           #    2.775 GHz                    
-    53,933,449,541      instructions                     #    2.87  insn per cycle         
-       6.768170476 seconds time elapsed
+    18,843,132,149      cycles                           #    2.926 GHz                    
+    53,928,570,497      instructions                     #    2.86  insn per cycle         
+       6.442267111 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32022) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.591597e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.591682e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.591682e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.562611e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.562704e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.562704e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     3.318241 sec
+TOTAL       :     3.380180 sec
 INFO: No Floating Point Exceptions have been reported
-     9,916,194,875      cycles                           #    2.985 GHz                    
-    27,130,313,114      instructions                     #    2.74  insn per cycle         
-       3.322321674 seconds time elapsed
+     9,918,861,148      cycles                           #    2.932 GHz                    
+    27,128,280,341      instructions                     #    2.74  insn per cycle         
+       3.383996000 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96368) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285514851E-003
 Relative difference = 3.5163655122073967e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.453253e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.453647e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.453647e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.368711e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.369114e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.369114e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.529460 sec
+TOTAL       :     1.568940 sec
 INFO: No Floating Point Exceptions have been reported
-     4,280,208,349      cycles                           #    2.792 GHz                    
-     9,585,335,411      instructions                     #    2.24  insn per cycle         
-       1.533657128 seconds time elapsed
+     4,289,720,535      cycles                           #    2.728 GHz                    
+     9,584,928,513      instructions                     #    2.23  insn per cycle         
+       1.573132113 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84968) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.973507e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.974053e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.974053e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.874256e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.874798e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.874798e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.330487 sec
+TOTAL       :     1.364932 sec
 INFO: No Floating Point Exceptions have been reported
-     3,719,620,826      cycles                           #    2.788 GHz                    
-     8,507,837,533      instructions                     #    2.29  insn per cycle         
-       1.334658974 seconds time elapsed
+     3,728,944,037      cycles                           #    2.726 GHz                    
+     8,507,330,131      instructions                     #    2.28  insn per cycle         
+       1.368786926 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80632) (512y:  239) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.357138e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.357653e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.357653e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.414224e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.414743e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.414743e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     1.572289 sec
+TOTAL       :     1.549013 sec
 INFO: No Floating Point Exceptions have been reported
-     2,699,107,966      cycles                           #    1.713 GHz                    
-     4,281,647,123      instructions                     #    1.59  insn per cycle         
-       1.576538172 seconds time elapsed
+     2,698,122,905      cycles                           #    1.738 GHz                    
+     4,280,648,246      instructions                     #    1.59  insn per cycle         
+       1.553090413 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2690) (512y:  185) (512z:79098)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595285411531E-003
 Relative difference = 3.516375977906115e-07
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
index 09bdb5cca5..26694465db 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:26:04
+DATE: 2024-09-15_11:23:17
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.208371e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.209254e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.209558e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.208704e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.209632e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.209859e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.739489 sec
+TOTAL       :     1.761935 sec
 INFO: No Floating Point Exceptions have been reported
-     5,891,365,145      cycles                           #    2.944 GHz                    
-    12,121,835,329      instructions                     #    2.06  insn per cycle         
-       2.057320302 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
+     5,908,004,381      cycles                           #    2.901 GHz                    
+    11,686,361,328      instructions                     #    1.98  insn per cycle         
+       2.093948305 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.128447e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.128992e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.129117e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.102338e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.102897e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.103014e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.049663 sec
+TOTAL       :     2.075560 sec
 INFO: No Floating Point Exceptions have been reported
-     6,859,898,495      cycles                           #    2.967 GHz                    
-    15,045,871,459      instructions                     #    2.19  insn per cycle         
-       2.368699830 seconds time elapsed
+     6,795,219,354      cycles                           #    2.902 GHz                    
+    14,967,758,240      instructions                     #    2.20  insn per cycle         
+       2.398428041 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.849635e-03
 Avg ME (F77/GPU)   = 9.8712451931260159E-003
 Relative difference = 0.0021940095370046923
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.700551e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.700826e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.700826e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.563117e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.563375e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.563375e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.069647 sec
+TOTAL       :     6.168224 sec
 INFO: No Floating Point Exceptions have been reported
-    18,130,580,813      cycles                           #    2.986 GHz                    
-    53,910,417,290      instructions                     #    2.97  insn per cycle         
-       6.073649498 seconds time elapsed
+    18,106,019,929      cycles                           #    2.934 GHz                    
+    53,907,716,361      instructions                     #    2.98  insn per cycle         
+       6.172403776 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847961e-03
 Avg ME (F77/C++)    = 9.8479612087551509E-003
 Relative difference = 2.119780432912131e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.447845e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.448292e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.448292e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.366569e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.366962e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.366962e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.533158 sec
+TOTAL       :     1.570259 sec
 INFO: No Floating Point Exceptions have been reported
-     4,594,810,836      cycles                           #    2.990 GHz                    
-    13,807,588,425      instructions                     #    3.01  insn per cycle         
-       1.537438030 seconds time elapsed
+     4,597,646,888      cycles                           #    2.923 GHz                    
+    13,807,163,752      instructions                     #    3.00  insn per cycle         
+       1.574045592 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847955e-03
 Avg ME (F77/C++)    = 9.8479546896367235E-003
 Relative difference = 3.1515505172940424e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.958378e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.960444e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.960444e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.801272e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.802916e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.802916e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.760583 sec
+TOTAL       :     0.778379 sec
 INFO: No Floating Point Exceptions have been reported
-     2,125,857,620      cycles                           #    2.782 GHz                    
-     4,836,877,931      instructions                     #    2.28  insn per cycle         
-       0.764668733 seconds time elapsed
+     2,130,043,758      cycles                           #    2.726 GHz                    
+     4,836,599,174      instructions                     #    2.27  insn per cycle         
+       0.782206721 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85494) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091246E-003
 Relative difference = 1.8588029579156084e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.833177e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.835234e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.835234e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.682520e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.684604e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.684604e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.675768 sec
+TOTAL       :     0.688940 sec
 INFO: No Floating Point Exceptions have been reported
-     1,884,867,686      cycles                           #    2.775 GHz                    
-     4,291,429,103      instructions                     #    2.28  insn per cycle         
-       0.679860499 seconds time elapsed
+     1,884,507,725      cycles                           #    2.723 GHz                    
+     4,290,819,235      instructions                     #    2.28  insn per cycle         
+       0.692749981 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81185) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091246E-003
 Relative difference = 1.8588029579156084e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.899775e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.902123e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.902123e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.875530e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.877565e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.877565e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.767901 sec
+TOTAL       :     0.770700 sec
 INFO: No Floating Point Exceptions have been reported
-     1,356,316,269      cycles                           #    1.758 GHz                    
-     2,162,893,262      instructions                     #    1.59  insn per cycle         
-       0.772192163 seconds time elapsed
+     1,352,613,897      cycles                           #    1.747 GHz                    
+     2,162,405,721      instructions                     #    1.60  insn per cycle         
+       0.774947088 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3479) (512y:   47) (512z:79330)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892981e-03
 Avg ME (F77/C++)    = 9.8929811982676284E-003
 Relative difference = 2.004124217057488e-08
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
index 51152ded1c..8e4037314e 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,11 +21,11 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:47:09
+DATE: 2024-09-15_11:50:44
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -34,17 +34,17 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.301300e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.306154e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.306154e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.261572e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.268191e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.268191e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187093e-05 +- 9.825663e-06 )  GeV^-6
-TOTAL       :     1.672870 sec
+TOTAL       :     1.738260 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     5,721,818,888      cycles                           #    2.961 GHz                    
-    12,395,752,294      instructions                     #    2.17  insn per cycle         
-       1.988963755 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
+     5,820,188,860      cycles                           #    2.913 GHz                    
+    12,502,480,728      instructions                     #    2.15  insn per cycle         
+       2.056507800 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gputhreads=256)
@@ -52,7 +52,7 @@ WARNING! Set grid in Bridge (nevt=256, gpublocks=1, gputhreads=256, gpublocks*gp
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -61,18 +61,18 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.121175e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.132575e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.132575e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.148842e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.160493e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.160493e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856440e-04 +- 8.331091e-05 )  GeV^-6
-TOTAL       :     2.019350 sec
+TOTAL       :     2.045649 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     6,564,670,149      cycles                           #    2.877 GHz                    
-    14,591,387,808      instructions                     #    2.22  insn per cycle         
-       2.337966336 seconds time elapsed
+     6,753,066,902      cycles                           #    2.917 GHz                    
+    14,813,097,918      instructions                     #    2.19  insn per cycle         
+       2.374262766 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -80,35 +80,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.849635e-03
 Avg ME (F77/GPU)   = 9.8712451931260159E-003
 Relative difference = 0.0021940095370046923
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.683425e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.683681e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.683681e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.502637e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.502889e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.502889e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.082069 sec
+TOTAL       :     6.211468 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-    18,173,296,870      cycles                           #    2.987 GHz                    
-    53,912,583,820      instructions                     #    2.97  insn per cycle         
-       6.086323477 seconds time elapsed
+    18,143,644,137      cycles                           #    2.921 GHz                    
+    53,909,939,321      instructions                     #    2.97  insn per cycle         
+       6.215559174 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -116,33 +116,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847961e-03
 Avg ME (F77/C++)    = 9.8479612087551509E-003
 Relative difference = 2.119780432912131e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.446030e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.446471e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.446471e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.339632e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.340031e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.340031e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.534197 sec
+TOTAL       :     1.583158 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,592,088,424      cycles                           #    2.986 GHz                    
-    13,809,471,792      instructions                     #    3.01  insn per cycle         
-       1.538429740 seconds time elapsed
+     4,644,642,721      cycles                           #    2.928 GHz                    
+    13,808,855,992      instructions                     #    2.97  insn per cycle         
+       1.587116749 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:97016) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -150,33 +150,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847955e-03
 Avg ME (F77/C++)    = 9.8479546896367235E-003
 Relative difference = 3.1515505172940424e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.989257e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.991152e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.991152e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.786207e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.787843e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.787843e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.757797 sec
+TOTAL       :     0.780530 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,125,674,277      cycles                           #    2.793 GHz                    
-     4,839,269,663      instructions                     #    2.28  insn per cycle         
-       0.762099515 seconds time elapsed
+     2,130,238,055      cycles                           #    2.718 GHz                    
+     4,838,587,482      instructions                     #    2.27  insn per cycle         
+       0.784611119 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85494) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -184,33 +184,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091246E-003
 Relative difference = 1.8588029579156084e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.782488e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.784687e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.784687e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.698223e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.700507e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.700507e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.681843 sec
+TOTAL       :     0.688171 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,884,280,976      cycles                           #    2.749 GHz                    
-     4,293,258,485      instructions                     #    2.28  insn per cycle         
-       0.686156926 seconds time elapsed
+     1,885,276,472      cycles                           #    2.726 GHz                    
+     4,293,094,440      instructions                     #    2.28  insn per cycle         
+       0.692122848 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81185) (512y:   44) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -218,33 +218,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091246E-003
 Relative difference = 1.8588029579156084e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=256)
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.037894e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.040203e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.040203e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.810636e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.812720e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.812720e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.753383 sec
+TOTAL       :     0.777981 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,356,538,183      cycles                           #    1.791 GHz                    
-     2,164,775,306      instructions                     #    1.60  insn per cycle         
-       0.757910726 seconds time elapsed
+     1,355,130,660      cycles                           #    1.735 GHz                    
+     2,164,600,762      instructions                     #    1.60  insn per cycle         
+       0.782043846 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3479) (512y:   47) (512z:79330)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -252,8 +252,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892981e-03
 Avg ME (F77/C++)    = 9.8929811982676284E-003
 Relative difference = 2.004124217057488e-08
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
index 65b406fa05..113bcaacf7 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:27:06
+DATE: 2024-09-15_11:24:20
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.191607e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.192340e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.192663e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.202287e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.203031e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.203251e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.186984e-05 +- 9.824899e-06 )  GeV^-6
-TOTAL       :     1.742189 sec
+TOTAL       :     1.759878 sec
 INFO: No Floating Point Exceptions have been reported
-     5,932,372,302      cycles                           #    2.963 GHz                    
-    12,508,103,447      instructions                     #    2.11  insn per cycle         
-       2.058857713 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
+     5,921,315,429      cycles                           #    2.907 GHz                    
+    12,451,469,321      instructions                     #    2.10  insn per cycle         
+       2.095417433 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.133592e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.134143e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.134218e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.113173e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.113784e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.113870e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856829e-04 +- 8.333437e-05 )  GeV^-6
-TOTAL       :     2.056045 sec
+TOTAL       :     2.088774 sec
 INFO: No Floating Point Exceptions have been reported
-     6,889,881,075      cycles                           #    2.974 GHz                    
-    14,292,406,169      instructions                     #    2.07  insn per cycle         
-       2.373717598 seconds time elapsed
+     6,829,574,271      cycles                           #    2.905 GHz                    
+    14,898,722,914      instructions                     #    2.18  insn per cycle         
+       2.410171422 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.849635e-03
 Avg ME (F77/GPU)   = 9.8712451931260107E-003
 Relative difference = 0.0021940095370041636
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.764150e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.764424e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.764424e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.526309e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.526569e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.526569e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825040e-06 )  GeV^-6
-TOTAL       :     6.026782 sec
+TOTAL       :     6.192756 sec
 INFO: No Floating Point Exceptions have been reported
-    18,051,302,313      cycles                           #    2.994 GHz                    
-    53,897,977,148      instructions                     #    2.99  insn per cycle         
-       6.030777768 seconds time elapsed
+    18,135,421,902      cycles                           #    2.927 GHz                    
+    53,892,650,631      instructions                     #    2.97  insn per cycle         
+       6.196840431 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:20141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847961e-03
 Avg ME (F77/C++)    = 9.8479612087572898E-003
 Relative difference = 2.1198021522715588e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.471197e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.471611e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.471611e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.396709e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.397124e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.397124e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187013e-05 +- 9.825037e-06 )  GeV^-6
-TOTAL       :     1.522952 sec
+TOTAL       :     1.556067 sec
 INFO: No Floating Point Exceptions have been reported
-     4,568,290,742      cycles                           #    2.993 GHz                    
-    13,800,705,057      instructions                     #    3.02  insn per cycle         
-       1.527092884 seconds time elapsed
+     4,573,398,855      cycles                           #    2.934 GHz                    
+    13,800,378,388      instructions                     #    3.02  insn per cycle         
+       1.559827589 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96651) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.847955e-03
 Avg ME (F77/C++)    = 9.8479546896065809E-003
 Relative difference = 3.151856596628469e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.916676e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.918574e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.918574e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.651495e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.653049e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.653049e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.765094 sec
+TOTAL       :     0.795401 sec
 INFO: No Floating Point Exceptions have been reported
-     2,149,872,306      cycles                           #    2.797 GHz                    
-     4,840,917,798      instructions                     #    2.25  insn per cycle         
-       0.769224783 seconds time elapsed
+     2,148,860,867      cycles                           #    2.691 GHz                    
+     4,840,602,339      instructions                     #    2.25  insn per cycle         
+       0.799229981 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:85884) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091923E-003
 Relative difference = 1.85880227405429e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 7.862816e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.864916e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.864916e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.688407e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.690576e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.690576e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826767e-06 )  GeV^-6
-TOTAL       :     0.673473 sec
+TOTAL       :     0.688541 sec
 INFO: No Floating Point Exceptions have been reported
-     1,891,619,024      cycles                           #    2.794 GHz                    
-     4,294,912,545      instructions                     #    2.27  insn per cycle         
-       0.677525045 seconds time elapsed
+     1,890,706,185      cycles                           #    2.733 GHz                    
+     4,294,394,779      instructions                     #    2.27  insn per cycle         
+       0.692328039 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:81725) (512y:   24) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892973e-03
 Avg ME (F77/C++)    = 9.8929728161091923E-003
 Relative difference = 1.85880227405429e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.805387e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.807811e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.807811e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.826093e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.828148e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.828148e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187188e-05 +- 9.826771e-06 )  GeV^-6
-TOTAL       :     0.778957 sec
+TOTAL       :     0.775828 sec
 INFO: No Floating Point Exceptions have been reported
-     1,357,861,459      cycles                           #    1.735 GHz                    
-     2,170,288,189      instructions                     #    1.60  insn per cycle         
-       0.783205677 seconds time elapsed
+     1,357,390,482      cycles                           #    1.742 GHz                    
+     2,169,212,126      instructions                     #    1.60  insn per cycle         
+       0.779795742 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4092) (512y:   32) (512z:79551)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.892981e-03
 Avg ME (F77/C++)    = 9.8929811982957326E-003
 Relative difference = 2.0044082998332894e-08
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
index 30b5071b57..2e59aa2257 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:28:07
+DATE: 2024-09-15_11:25:23
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.671280e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.671774e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.671942e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.663841e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.664390e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.664590e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.179035 sec
+TOTAL       :     2.203630 sec
 INFO: No Floating Point Exceptions have been reported
-     7,454,613,030      cycles                           #    2.972 GHz                    
-    15,438,571,901      instructions                     #    2.07  insn per cycle         
-       2.564208911 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
+     7,260,397,959      cycles                           #    2.866 GHz                    
+    15,031,707,879      instructions                     #    2.07  insn per cycle         
+       2.589013700 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.108972e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.109238e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.109268e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.107763e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.108067e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.108098e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.409316 sec
+TOTAL       :     3.442022 sec
 INFO: No Floating Point Exceptions have been reported
-    11,057,666,610      cycles                           #    2.961 GHz                    
-    25,976,156,930      instructions                     #    2.35  insn per cycle         
-       3.792664161 seconds time elapsed
+    10,932,120,354      cycles                           #    2.895 GHz                    
+    24,906,946,249      instructions                     #    2.28  insn per cycle         
+       3.831975982 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.872263e-03
 Avg ME (F77/GPU)   = 9.8722599015656498E-003
 Relative difference = 3.1385249252060663e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.690962e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.691161e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.691161e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.516129e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.516327e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.516327e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.867055 sec
+TOTAL       :     7.025685 sec
 INFO: No Floating Point Exceptions have been reported
-    19,231,108,767      cycles                           #    2.799 GHz                    
-    54,133,653,277      instructions                     #    2.81  insn per cycle         
-       6.871158287 seconds time elapsed
+    19,256,305,943      cycles                           #    2.740 GHz                    
+    54,130,622,749      instructions                     #    2.81  insn per cycle         
+       7.029878997 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32000) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595861831675E-003
 Relative difference = 3.457988134687711e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.564157e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.564257e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.564257e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.524890e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.524973e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.524973e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.376613 sec
+TOTAL       :     3.464849 sec
 INFO: No Floating Point Exceptions have been reported
-     9,406,223,070      cycles                           #    2.783 GHz                    
-    26,189,614,539      instructions                     #    2.78  insn per cycle         
-       3.380789677 seconds time elapsed
+     9,453,784,509      cycles                           #    2.726 GHz                    
+    26,186,103,091      instructions                     #    2.77  insn per cycle         
+       3.468732831 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:96049) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594844308162E-003
 Relative difference = 3.5610570575237004e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.630251e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.630699e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.630699e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.508306e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.508754e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.508754e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.457110 sec
+TOTAL       :     1.507182 sec
 INFO: No Floating Point Exceptions have been reported
-     4,072,790,882      cycles                           #    2.789 GHz                    
-     9,249,316,005      instructions                     #    2.27  insn per cycle         
-       1.461211705 seconds time elapsed
+     4,099,795,192      cycles                           #    2.715 GHz                    
+     9,249,955,249      instructions                     #    2.26  insn per cycle         
+       1.510975685 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:84390) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.130909e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.131518e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.131518e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.116819e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.117442e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.117442e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.278845 sec
+TOTAL       :     1.284411 sec
 INFO: No Floating Point Exceptions have been reported
-     3,501,587,241      cycles                           #    2.731 GHz                    
-     8,183,389,621      instructions                     #    2.34  insn per cycle         
-       1.283034636 seconds time elapsed
+     3,509,716,252      cycles                           #    2.725 GHz                    
+     8,182,475,258      instructions                     #    2.33  insn per cycle         
+       1.288638878 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:80015) (512y:   79) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.612594e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.613211e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.613211e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.462021e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.462537e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.462537e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.463667 sec
+TOTAL       :     1.527279 sec
 INFO: No Floating Point Exceptions have been reported
-     2,655,595,463      cycles                           #    1.810 GHz                    
-     4,172,901,879      instructions                     #    1.57  insn per cycle         
-       1.467972309 seconds time elapsed
+     2,661,319,941      cycles                           #    1.739 GHz                    
+     4,172,569,565      instructions                     #    1.57  insn per cycle         
+       1.531717386 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2612) (512y:   93) (512z:78910)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
diff --git a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
index 55b2618d91..f2e4a2151c 100644
--- a/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_ggttggg_mad/log_ggttggg_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -21,44 +21,44 @@ make: Nothing to be done for 'all'.
 
 make: Nothing to be done for 'all'.
 
-DATE: 2024-09-01_23:29:32
+DATE: 2024-09-15_11:26:49
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.668363e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.668845e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.669031e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.668216e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.668742e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.668891e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     2.178975 sec
+TOTAL       :     2.204926 sec
 INFO: No Floating Point Exceptions have been reported
-     7,457,493,932      cycles                           #    2.978 GHz                    
-    15,404,288,605      instructions                     #    2.07  insn per cycle         
-       2.560153423 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
+     7,354,907,186      cycles                           #    2.903 GHz                    
+    15,835,326,846      instructions                     #    2.15  insn per cycle         
+       2.589353613 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.105339e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.105611e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.105641e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.111109e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.111413e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.111447e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 1.856249e-04 +- 8.329951e-05 )  GeV^-6
-TOTAL       :     3.412909 sec
+TOTAL       :     3.435805 sec
 INFO: No Floating Point Exceptions have been reported
-    11,131,620,298      cycles                           #    2.977 GHz                    
-    26,091,817,296      instructions                     #    2.34  insn per cycle         
-       3.795284390 seconds time elapsed
+    11,002,728,447      cycles                           #    2.923 GHz                    
+    25,822,053,923      instructions                     #    2.35  insn per cycle         
+       3.822280777 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -66,33 +66,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 9.872263e-03
 Avg ME (F77/GPU)   = 9.8722599015656498E-003
 Relative difference = 3.1385249252060663e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.937184e+01                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.937416e+01                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.937416e+01                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.824002e+01                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.824211e+01                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.824211e+01                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825549e-06 )  GeV^-6
-TOTAL       :     6.645811 sec
+TOTAL       :     6.748238 sec
 INFO: No Floating Point Exceptions have been reported
-    19,116,090,748      cycles                           #    2.875 GHz                    
-    54,157,566,161      instructions                     #    2.83  insn per cycle         
-       6.649761760 seconds time elapsed
+    19,286,477,225      cycles                           #    2.857 GHz                    
+    54,157,907,603      instructions                     #    2.81  insn per cycle         
+       6.752432065 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:32202) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -100,31 +100,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722595861831675E-003
 Relative difference = 3.457988134687711e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.593746e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.593833e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.593833e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.548001e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.548086e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.548086e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     3.315385 sec
+TOTAL       :     3.412561 sec
 INFO: No Floating Point Exceptions have been reported
-     9,261,820,614      cycles                           #    2.791 GHz                    
-    26,087,601,781      instructions                     #    2.82  insn per cycle         
-       3.319522422 seconds time elapsed
+     9,302,368,855      cycles                           #    2.723 GHz                    
+    26,085,336,117      instructions                     #    2.80  insn per cycle         
+       3.416771061 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:95938) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -132,31 +132,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594844308162E-003
 Relative difference = 3.5610570575237004e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.574845e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.575287e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.575287e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.533570e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.534051e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.534051e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.477762 sec
+TOTAL       :     1.496264 sec
 INFO: No Floating Point Exceptions have been reported
-     4,086,825,644      cycles                           #    2.759 GHz                    
-     9,213,646,236      instructions                     #    2.25  insn per cycle         
-       1.481734504 seconds time elapsed
+     4,086,923,304      cycles                           #    2.726 GHz                    
+     9,212,952,806      instructions                     #    2.25  insn per cycle         
+       1.500090267 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:83864) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -164,31 +164,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.245121e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.245740e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.245740e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.068352e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.068931e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.068931e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.245877 sec
+TOTAL       :     1.299365 sec
 INFO: No Floating Point Exceptions have been reported
-     3,507,145,990      cycles                           #    2.807 GHz                    
-     8,168,649,004      instructions                     #    2.33  insn per cycle         
-       1.249959812 seconds time elapsed
+     3,513,960,907      cycles                           #    2.698 GHz                    
+     8,167,668,326      instructions                     #    2.32  insn per cycle         
+       1.303235401 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:79421) (512y:  229) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -196,31 +196,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GG_TTXGGG_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.628651e+02                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.629248e+02                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.629248e+02                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.521239e+02                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.521794e+02                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.521794e+02                 )  sec^-1
 MeanMatrixElemValue         = ( 1.187066e-05 +- 9.825548e-06 )  GeV^-6
-TOTAL       :     1.460034 sec
+TOTAL       :     1.500932 sec
 INFO: No Floating Point Exceptions have been reported
-     2,627,414,624      cycles                           #    1.795 GHz                    
-     4,168,572,006      instructions                     #    1.59  insn per cycle         
-       1.464415967 seconds time elapsed
+     2,617,549,535      cycles                           #    1.740 GHz                    
+     4,166,941,618      instructions                     #    1.59  insn per cycle         
+       1.504880250 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1876) (512y:  175) (512z:78884)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 1240 channels { no-multichannel : 512 }
@@ -228,8 +228,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 1240 channels { 1 : 32, 2 : 32, 4 : 32, 5 : 32, 7 : 32, 8 : 32, 14 : 32, 15 : 32, 16 : 32, 18 : 32, 19 : 32, 20 : 32, 22 : 32, 23 : 32, 24 : 32, 26 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gg_ttggg.mad/SubProcesses/P1_gg_ttxggg/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 9.872263e-03
 Avg ME (F77/C++)    = 9.8722594324461913E-003
 Relative difference = 3.613714310412983e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
index 752f3e4e22..73af5e5b3a 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:21:53
+DATE: 2024-09-15_11:19:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.031687e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.003161e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.104462e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.740481e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.765338e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.886154e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.455310 sec
+TOTAL       :     0.459950 sec
 INFO: No Floating Point Exceptions have been reported
-     1,914,069,990      cycles                           #    2.849 GHz                    
-     2,713,664,499      instructions                     #    1.42  insn per cycle         
-       0.728378450 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,934,018,539      cycles                           #    2.861 GHz                    
+     2,739,518,446      instructions                     #    1.42  insn per cycle         
+       0.734446139 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.192130e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.579973e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.785909e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.975676e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.474629e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.695966e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.533178 sec
+TOTAL       :     0.539678 sec
 INFO: No Floating Point Exceptions have been reported
-     2,236,133,262      cycles                           #    2.909 GHz                    
-     3,226,380,886      instructions                     #    1.44  insn per cycle         
-       0.827389063 seconds time elapsed
+     2,257,806,163      cycles                           #    2.877 GHz                    
+     3,239,125,642      instructions                     #    1.43  insn per cycle         
+       0.841027050 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424749e-01
 Avg ME (F77/GPU)   = 0.14247482467490466
 Relative difference = 5.286902838873106e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.068548e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.091206e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.091206e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.056428e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.078475e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.078475e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.552747 sec
+TOTAL       :     1.570596 sec
 INFO: No Floating Point Exceptions have been reported
-     4,620,087,308      cycles                           #    2.969 GHz                    
-    13,191,209,906      instructions                     #    2.86  insn per cycle         
-       1.556877489 seconds time elapsed
+     4,620,202,435      cycles                           #    2.935 GHz                    
+    13,190,173,768      instructions                     #    2.85  insn per cycle         
+       1.574765138 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499481
 Relative difference = 5.286896511435107e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.880560e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.951101e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.951101e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.870844e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.942105e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.942105e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.889978 sec
+TOTAL       :     0.895190 sec
 INFO: No Floating Point Exceptions have been reported
-     2,637,556,758      cycles                           #    2.952 GHz                    
-     7,554,793,042      instructions                     #    2.86  insn per cycle         
-       0.894227958 seconds time elapsed
+     2,640,894,010      cycles                           #    2.940 GHz                    
+     7,556,112,587      instructions                     #    2.86  insn per cycle         
+       0.899078617 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499475
 Relative difference = 5.286896515331313e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.229284e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.441159e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.441159e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.155420e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.359383e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.359383e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.526334 sec
+TOTAL       :     0.539606 sec
 INFO: No Floating Point Exceptions have been reported
-     1,488,988,294      cycles                           #    2.810 GHz                    
-     3,159,434,723      instructions                     #    2.12  insn per cycle         
-       0.530437516 seconds time elapsed
+     1,490,717,557      cycles                           #    2.746 GHz                    
+     3,161,146,919      instructions                     #    2.12  insn per cycle         
+       0.543466540 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2991) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.574498e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.829667e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.829667e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.514709e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.763624e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.763624e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.477558 sec
+TOTAL       :     0.485581 sec
 INFO: No Floating Point Exceptions have been reported
-     1,345,056,587      cycles                           #    2.796 GHz                    
-     3,014,576,152      instructions                     #    2.24  insn per cycle         
-       0.481628256 seconds time elapsed
+     1,345,992,067      cycles                           #    2.752 GHz                    
+     3,013,895,719      instructions                     #    2.24  insn per cycle         
+       0.489750963 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2752) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.397723e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.510771e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.510771e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.329309e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.438411e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.438411e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.704790 sec
+TOTAL       :     0.725847 sec
 INFO: No Floating Point Exceptions have been reported
-     1,325,976,316      cycles                           #    1.872 GHz                    
-     1,962,614,842      instructions                     #    1.48  insn per cycle         
-       0.709099149 seconds time elapsed
+     1,326,647,346      cycles                           #    1.820 GHz                    
+     1,963,906,161      instructions                     #    1.48  insn per cycle         
+       0.729744934 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1379) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
index e205a96493..87049bf6bc 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:44:16
+DATE: 2024-09-15_11:47:47
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.345310e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.664571e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.664571e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.302816e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.642797e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.642797e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.479051 sec
+TOTAL       :     0.484911 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,024,905,494      cycles                           #    2.924 GHz                    
-     3,010,761,757      instructions                     #    1.49  insn per cycle         
-       0.749424438 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     1,993,081,726      cycles                           #    2.857 GHz                    
+     2,967,605,428      instructions                     #    1.49  insn per cycle         
+       0.755486323 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.221274e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.154965e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.154965e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.256871e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.326938e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.326938e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.753201 sec
+TOTAL       :     0.759153 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,928,569,908      cycles                           #    2.912 GHz                    
-     4,501,106,831      instructions                     #    1.54  insn per cycle         
-       1.062426776 seconds time elapsed
+     2,900,800,476      cycles                           #    2.878 GHz                    
+     4,476,324,954      instructions                     #    1.54  insn per cycle         
+       1.066391830 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424749e-01
 Avg ME (F77/GPU)   = 0.14247482467490466
 Relative difference = 5.286902838873106e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.079441e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.102836e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.102836e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.051618e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.074674e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.074674e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.542749 sec
+TOTAL       :     1.585397 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,649,053,239      cycles                           #    3.007 GHz                    
-    13,196,358,277      instructions                     #    2.84  insn per cycle         
-       1.547064126 seconds time elapsed
+     4,659,791,475      cycles                           #    2.933 GHz                    
+    13,199,729,048      instructions                     #    2.83  insn per cycle         
+       1.589552076 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  707) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499481
 Relative difference = 5.286896511435107e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.903842e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.977293e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.977293e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.863646e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.935223e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.935223e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.886250 sec
+TOTAL       :     0.905908 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,671,400,020      cycles                           #    3.001 GHz                    
-     7,604,703,234      instructions                     #    2.85  insn per cycle         
-       0.890720613 seconds time elapsed
+     2,672,075,267      cycles                           #    2.939 GHz                    
+     7,605,973,490      instructions                     #    2.85  insn per cycle         
+       0.909977972 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3099) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499475
 Relative difference = 5.286896515331313e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.228849e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.437424e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.437424e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.113398e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.317707e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.317707e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.532771 sec
+TOTAL       :     0.554094 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,514,010,227      cycles                           #    2.824 GHz                    
-     3,208,515,118      instructions                     #    2.12  insn per cycle         
-       0.537156979 seconds time elapsed
+     1,524,354,103      cycles                           #    2.734 GHz                    
+     3,211,905,393      instructions                     #    2.11  insn per cycle         
+       0.558166519 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2991) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.428264e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.678035e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.678035e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.488860e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.737446e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.737446e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.505454 sec
+TOTAL       :     0.497012 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,390,457,987      cycles                           #    2.731 GHz                    
-     3,065,685,884      instructions                     #    2.20  insn per cycle         
-       0.509808717 seconds time elapsed
+     1,381,887,844      cycles                           #    2.761 GHz                    
+     3,066,710,334      instructions                     #    2.22  insn per cycle         
+       0.501143809 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2752) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.404789e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.521630e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.521630e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.170464e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.268423e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.268423e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.709220 sec
+TOTAL       :     0.785429 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,357,049,699      cycles                           #    1.904 GHz                    
-     2,000,123,529      instructions                     #    1.47  insn per cycle         
-       0.713542278 seconds time elapsed
+     1,369,203,746      cycles                           #    1.799 GHz                    
+     2,005,266,999      instructions                     #    1.46  insn per cycle         
+       0.789533436 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1379) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
index f94a52ab72..f184fc3b5e 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:22:07
+DATE: 2024-09-15_11:19:15
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.978981e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.910532e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.009115e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.732857e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.764454e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.875095e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.453993 sec
+TOTAL       :     0.459022 sec
 INFO: No Floating Point Exceptions have been reported
-     1,944,810,772      cycles                           #    2.908 GHz                    
-     2,740,865,498      instructions                     #    1.41  insn per cycle         
-       0.726972885 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,935,515,541      cycles                           #    2.868 GHz                    
+     2,740,568,582      instructions                     #    1.42  insn per cycle         
+       0.732282850 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.154104e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.465948e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.663513e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.938986e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.388894e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.605968e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.530243 sec
+TOTAL       :     0.543627 sec
 INFO: No Floating Point Exceptions have been reported
-     2,242,321,005      cycles                           #    2.927 GHz                    
-     3,256,187,861      instructions                     #    1.45  insn per cycle         
-       0.823764416 seconds time elapsed
+     2,240,911,931      cycles                           #    2.849 GHz                    
+     3,134,508,527      instructions                     #    1.40  insn per cycle         
+       0.843804985 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424749e-01
 Avg ME (F77/GPU)   = 0.14247482467490466
 Relative difference = 5.286902838873106e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.081605e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.104353e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.104353e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.033389e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.055879e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.055879e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.533744 sec
+TOTAL       :     1.606028 sec
 INFO: No Floating Point Exceptions have been reported
-     4,619,608,835      cycles                           #    3.005 GHz                    
-    13,179,787,994      instructions                     #    2.85  insn per cycle         
-       1.537829933 seconds time elapsed
+     4,632,143,331      cycles                           #    2.878 GHz                    
+    13,180,119,009      instructions                     #    2.85  insn per cycle         
+       1.610268805 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  692) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499481
 Relative difference = 5.286896511435107e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.883806e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.954980e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.954980e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.829571e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.900649e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.900649e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.888511 sec
+TOTAL       :     0.915734 sec
 INFO: No Floating Point Exceptions have been reported
-     2,638,583,481      cycles                           #    2.959 GHz                    
-     7,553,274,738      instructions                     #    2.86  insn per cycle         
-       0.892657592 seconds time elapsed
+     2,643,771,941      cycles                           #    2.877 GHz                    
+     7,554,150,292      instructions                     #    2.86  insn per cycle         
+       0.919868185 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3093) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467499475
 Relative difference = 5.286896515331313e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.132913e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.333320e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.333320e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.046256e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.248866e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.248866e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.542044 sec
+TOTAL       :     0.558915 sec
 INFO: No Floating Point Exceptions have been reported
-     1,493,314,483      cycles                           #    2.737 GHz                    
-     3,158,681,228      instructions                     #    2.12  insn per cycle         
-       0.546207198 seconds time elapsed
+     1,500,616,577      cycles                           #    2.669 GHz                    
+     3,161,167,766      instructions                     #    2.11  insn per cycle         
+       0.563154837 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2976) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.575941e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.830718e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.830718e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.429175e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.674618e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.674618e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.477166 sec
+TOTAL       :     0.498371 sec
 INFO: No Floating Point Exceptions have been reported
-     1,346,749,124      cycles                           #    2.802 GHz                    
-     3,011,101,009      instructions                     #    2.24  insn per cycle         
-       0.481258455 seconds time elapsed
+     1,352,614,614      cycles                           #    2.696 GHz                    
+     3,013,058,203      instructions                     #    2.23  insn per cycle         
+       0.502370936 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2726) (512y:  104) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.401586e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.515098e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.515098e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.263352e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.370712e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.370712e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.703522 sec
+TOTAL       :     0.746831 sec
 INFO: No Floating Point Exceptions have been reported
-     1,323,479,582      cycles                           #    1.872 GHz                    
-     1,960,882,007      instructions                     #    1.48  insn per cycle         
-       0.707682489 seconds time elapsed
+     1,330,812,654      cycles                           #    1.774 GHz                    
+     1,962,138,478      instructions                     #    1.47  insn per cycle         
+       0.751010006 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1356) (512y:  106) (512z: 2218)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482467492589
 Relative difference = 5.286901348574438e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
index 539edfef06..9c9085f218 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:22:20
+DATE: 2024-09-15_11:19:29
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.045407e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.221377e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.365082e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.616183e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.859144e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.009356e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.447666 sec
+TOTAL       :     0.455344 sec
 INFO: No Floating Point Exceptions have been reported
-     1,945,143,368      cycles                           #    2.906 GHz                    
-     2,736,215,762      instructions                     #    1.41  insn per cycle         
-       0.726464456 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,903,769,704      cycles                           #    2.832 GHz                    
+     2,695,127,426      instructions                     #    1.42  insn per cycle         
+       0.728806465 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.771213e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.600955e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.919338e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.292895e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.269503e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.615665e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.485020 sec
+TOTAL       :     0.493521 sec
 INFO: No Floating Point Exceptions have been reported
-     2,064,276,099      cycles                           #    2.909 GHz                    
-     2,946,314,472      instructions                     #    1.43  insn per cycle         
-       0.768202354 seconds time elapsed
+     2,090,898,444      cycles                           #    2.835 GHz                    
+     2,942,471,441      instructions                     #    1.41  insn per cycle         
+       0.794240657 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424226e-01
 Avg ME (F77/GPU)   = 0.14247487904286338
 Relative difference = 0.0003670698531228044
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.119951e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.145461e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.145461e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.085317e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.110333e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.110333e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.480791 sec
+TOTAL       :     1.528688 sec
 INFO: No Floating Point Exceptions have been reported
-     4,405,520,694      cycles                           #    2.968 GHz                    
-    12,951,927,506      instructions                     #    2.94  insn per cycle         
-       1.484750852 seconds time elapsed
+     4,411,922,721      cycles                           #    2.879 GHz                    
+    12,951,312,387      instructions                     #    2.94  insn per cycle         
+       1.532844163 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246861273719524
 Relative difference = 8.940352641194861e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.931213e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.111077e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.111077e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.813599e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.988360e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.988360e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.575764 sec
+TOTAL       :     0.600071 sec
 INFO: No Floating Point Exceptions have been reported
-     1,726,755,289      cycles                           #    2.983 GHz                    
-     4,541,905,728      instructions                     #    2.63  insn per cycle         
-       0.579696563 seconds time elapsed
+     1,729,759,970      cycles                           #    2.867 GHz                    
+     4,541,750,353      instructions                     #    2.63  insn per cycle         
+       0.604044137 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246862329122401
 Relative difference = 1.6348320966878032e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.741178e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.447926e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.447926e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.481903e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.160699e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.160699e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.302577 sec
+TOTAL       :     0.317183 sec
 INFO: No Floating Point Exceptions have been reported
-       858,281,964      cycles                           #    2.805 GHz                    
-     1,918,005,611      instructions                     #    2.23  insn per cycle         
-       0.306598837 seconds time elapsed
+       858,921,512      cycles                           #    2.679 GHz                    
+     1,917,766,555      instructions                     #    2.23  insn per cycle         
+       0.321171597 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3580) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.186652e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.004123e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.004123e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.857503e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.629025e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.629025e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.282428 sec
+TOTAL       :     0.298118 sec
 INFO: No Floating Point Exceptions have been reported
-       802,950,150      cycles                           #    2.808 GHz                    
-     1,834,564,140      instructions                     #    2.28  insn per cycle         
-       0.286463360 seconds time elapsed
+       804,518,989      cycles                           #    2.670 GHz                    
+     1,834,610,739      instructions                     #    2.28  insn per cycle         
+       0.301964643 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3402) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.633415e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.094139e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.094139e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.365317e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.786659e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.786659e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.373155 sec
+TOTAL       :     0.395762 sec
 INFO: No Floating Point Exceptions have been reported
-       727,080,964      cycles                           #    1.931 GHz                    
-     1,308,398,620      instructions                     #    1.80  insn per cycle         
-       0.377057439 seconds time elapsed
+       728,663,796      cycles                           #    1.826 GHz                    
+     1,308,267,192      instructions                     #    1.80  insn per cycle         
+       0.399787635 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1963) (512y:   26) (512z: 2434)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491576758442
 Relative difference = 1.1066920862943416e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
index 516b3c63cb..f23dffbec1 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd0_bridge.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,40 +11,40 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:44:29
+DATE: 2024-09-15_11:48:01
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 10 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -53,17 +53,17 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.036695e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.480472e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.480472e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.986387e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.435739e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.435739e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.017654e+01 +- 1.429183e+01 )  GeV^-2
-TOTAL       :     0.460608 sec
+TOTAL       :     0.462861 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,967,208,517      cycles                           #    2.924 GHz                    
-     2,913,604,143      instructions                     #    1.48  insn per cycle         
-       0.729608923 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
+     1,938,742,838      cycles                           #    2.865 GHz                    
+     2,865,087,008      instructions                     #    1.48  insn per cycle         
+       0.732940290 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 --bridge
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 WARNING! Instantiate device Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks*gputhreads=16384)
@@ -71,7 +71,7 @@ WARNING! Set grid in Bridge (nevt=16384, gpublocks=64, gputhreads=256, gpublocks
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1 --bridge OMP=
 WARNING! Bridge selected: cannot use RamboDevice, will use RamboHost
 WARNING! RamboHost selected: cannot use CurandDevice, will use CurandHost
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
@@ -80,18 +80,18 @@ WARNING! Set grid in Bridge (nevt=524288, gpublocks=2048, gputhreads=256, gpublo
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURHST+RMBHST+BRDDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.953788e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.785252e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.785252e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.036027e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.082450e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.082450e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.609941e+02 +- 2.115589e+02 )  GeV^-2
-TOTAL       :     0.631447 sec
+TOTAL       :     0.632974 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     2,535,639,057      cycles                           #    2.933 GHz                    
-     3,859,863,698      instructions                     #    1.52  insn per cycle         
-       0.922711155 seconds time elapsed
+     2,495,936,905      cycles                           #    2.880 GHz                    
+     3,785,157,902      instructions                     #    1.52  insn per cycle         
+       0.923834641 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -99,35 +99,35 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424226e-01
 Avg ME (F77/GPU)   = 0.14247487904286338
 Relative difference = 0.0003670698531228044
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.129048e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.155080e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.155080e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.104272e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.129547e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.129547e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.471706 sec
+TOTAL       :     1.505088 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     4,420,739,520      cycles                           #    2.997 GHz                    
-    12,956,497,906      instructions                     #    2.93  insn per cycle         
-       1.475936292 seconds time elapsed
+     4,422,322,267      cycles                           #    2.932 GHz                    
+    12,955,751,055      instructions                     #    2.93  insn per cycle         
+       1.509164533 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  645) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -135,33 +135,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246861273719524
 Relative difference = 8.940352641194861e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.945017e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.127579e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.127579e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.849156e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.028095e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.028095e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.577519 sec
+TOTAL       :     0.596878 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-     1,746,510,912      cycles                           #    3.006 GHz                    
-     4,590,294,549      instructions                     #    2.63  insn per cycle         
-       0.581528587 seconds time elapsed
+     1,746,639,368      cycles                           #    2.911 GHz                    
+     4,590,056,426      instructions                     #    2.63  insn per cycle         
+       0.600772729 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3627) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -169,33 +169,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246862329122401
 Relative difference = 1.6348320966878032e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.790714e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.493578e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.493578e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.482456e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.156915e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.156915e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.304247 sec
+TOTAL       :     0.321160 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       875,834,497      cycles                           #    2.846 GHz                    
-     1,955,180,350      instructions                     #    2.23  insn per cycle         
-       0.308403282 seconds time elapsed
+       875,522,703      cycles                           #    2.698 GHz                    
+     1,954,476,479      instructions                     #    2.23  insn per cycle         
+       0.325091323 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3580) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -203,33 +203,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.189465e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.016700e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.016700e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.960780e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.751467e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.751467e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.286303 sec
+TOTAL       :     0.296758 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       822,678,068      cycles                           #    2.838 GHz                    
-     1,871,812,412      instructions                     #    2.28  insn per cycle         
-       0.290432615 seconds time elapsed
+       821,090,022      cycles                           #    2.738 GHz                    
+     1,871,468,752      instructions                     #    2.28  insn per cycle         
+       0.300472695 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3402) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -237,33 +237,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe -p 64 256 10 --bridge OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 WARNING! Instantiate host Bridge (nevt=16384)
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+BRDHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.670499e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.135311e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.135311e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.492830e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.932362e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.932362e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.374584 sec
+TOTAL       :     0.388889 sec
 INFO: No Floating Point Exceptions have been reported
 INFO: No Floating Point Exceptions have been reported
-       749,472,169      cycles                           #    1.981 GHz                    
-     1,349,919,039      instructions                     #    1.80  insn per cycle         
-       0.378841460 seconds time elapsed
+       746,594,357      cycles                           #    1.904 GHz                    
+     1,349,630,324      instructions                     #    1.81  insn per cycle         
+       0.392744433 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1963) (512y:   26) (512z: 2434)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -271,8 +271,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491576758442
 Relative difference = 1.1066920862943416e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
index 5c7c3206a6..e2521e45b2 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:22:32
+DATE: 2024-09-15_11:19:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.996806e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.140865e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.289001e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.639867e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.865475e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.015127e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018174e+01 +- 1.429492e+01 )  GeV^-2
-TOTAL       :     0.447667 sec
+TOTAL       :     0.454979 sec
 INFO: No Floating Point Exceptions have been reported
-     1,939,854,779      cycles                           #    2.902 GHz                    
-     2,753,564,074      instructions                     #    1.42  insn per cycle         
-       0.725490660 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,901,448,560      cycles                           #    2.824 GHz                    
+     2,678,183,164      instructions                     #    1.41  insn per cycle         
+       0.730072764 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 169
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 5.820533e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.706896e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.005515e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.246644e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.993104e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.325490e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.571360e+02 +- 2.114020e+02 )  GeV^-2
-TOTAL       :     0.482853 sec
+TOTAL       :     0.496812 sec
 INFO: No Floating Point Exceptions have been reported
-     2,066,417,058      cycles                           #    2.916 GHz                    
-     2,981,752,243      instructions                     #    1.44  insn per cycle         
-       0.766268306 seconds time elapsed
+     2,083,867,357      cycles                           #    2.832 GHz                    
+     2,934,470,565      instructions                     #    1.41  insn per cycle         
+       0.792648211 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424226e-01
 Avg ME (F77/GPU)   = 0.14247487904286338
 Relative difference = 0.0003670698531228044
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.131393e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.156981e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.156981e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.078136e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.103342e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.103342e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     1.465329 sec
+TOTAL       :     1.538413 sec
 INFO: No Floating Point Exceptions have been reported
-     4,403,508,539      cycles                           #    2.998 GHz                    
-    12,927,457,946      instructions                     #    2.94  insn per cycle         
-       1.469265049 seconds time elapsed
+     4,411,092,348      cycles                           #    2.861 GHz                    
+    12,926,836,759      instructions                     #    2.93  insn per cycle         
+       1.542610115 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  630) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246861273719524
 Relative difference = 8.940352641194861e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.923741e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.107237e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.107237e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.816554e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.994652e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.994652e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018564e+01 +- 1.429903e+01 )  GeV^-2
-TOTAL       :     0.577117 sec
+TOTAL       :     0.599654 sec
 INFO: No Floating Point Exceptions have been reported
-     1,726,421,109      cycles                           #    2.975 GHz                    
-     4,536,755,110      instructions                     #    2.63  insn per cycle         
-       0.581051834 seconds time elapsed
+     1,728,903,265      cycles                           #    2.870 GHz                    
+     4,536,279,042      instructions                     #    2.62  insn per cycle         
+       0.603646034 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3611) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424686e-01
 Avg ME (F77/C++)    = 0.14246862329122401
 Relative difference = 1.6348320966878032e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.757866e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.468995e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.468995e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.299874e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.938357e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.938357e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.301649 sec
+TOTAL       :     0.327791 sec
 INFO: No Floating Point Exceptions have been reported
-       856,537,413      cycles                           #    2.808 GHz                    
-     1,914,811,873      instructions                     #    2.24  insn per cycle         
-       0.305509856 seconds time elapsed
+       861,849,665      cycles                           #    2.602 GHz                    
+     1,914,633,101      instructions                     #    2.22  insn per cycle         
+       0.331876637 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3550) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.174332e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.002332e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.002332e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.927651e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.715679e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.715679e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018828e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.282627 sec
+TOTAL       :     0.294357 sec
 INFO: No Floating Point Exceptions have been reported
-       801,753,582      cycles                           #    2.803 GHz                    
-     1,830,571,668      instructions                     #    2.28  insn per cycle         
-       0.286587480 seconds time elapsed
+       802,533,600      cycles                           #    2.696 GHz                    
+     1,830,391,280      instructions                     #    2.28  insn per cycle         
+       0.298329557 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3366) (512y:   22) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491543012991
 Relative difference = 1.0830068962165901e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.655287e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.123588e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.123588e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.433633e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.866083e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.866083e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018829e+01 +- 1.429922e+01 )  GeV^-2
-TOTAL       :     0.371375 sec
+TOTAL       :     0.389558 sec
 INFO: No Floating Point Exceptions have been reported
-       728,666,338      cycles                           #    1.944 GHz                    
-     1,306,341,029      instructions                     #    1.79  insn per cycle         
-       0.375409232 seconds time elapsed
+       729,078,705      cycles                           #    1.856 GHz                    
+     1,305,984,013      instructions                     #    1.79  insn per cycle         
+       0.393475655 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1926) (512y:   26) (512z: 2437)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247491576758442
 Relative difference = 1.1066920862943416e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
index e2c10d4bca..75ffaff930 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:22:44
+DATE: 2024-09-15_11:19:53
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.042271e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.019347e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.126926e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.751782e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.854275e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.972003e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.455575 sec
+TOTAL       :     0.462457 sec
 INFO: No Floating Point Exceptions have been reported
-     1,949,957,691      cycles                           #    2.915 GHz                    
-     2,774,209,464      instructions                     #    1.42  insn per cycle         
-       0.728034530 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
+     1,917,992,386      cycles                           #    2.823 GHz                    
+     2,716,857,811      instructions                     #    1.42  insn per cycle         
+       0.737791701 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.182186e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.548184e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.751701e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.933049e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.480174e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.706062e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.531018 sec
+TOTAL       :     0.542910 sec
 INFO: No Floating Point Exceptions have been reported
-     2,238,994,244      cycles                           #    2.915 GHz                    
-     3,265,794,068      instructions                     #    1.46  insn per cycle         
-       0.824796035 seconds time elapsed
+     2,260,071,894      cycles                           #    2.877 GHz                    
+     3,201,078,521      instructions                     #    1.42  insn per cycle         
+       0.842582370 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424749e-01
 Avg ME (F77/GPU)   = 0.14247482577104625
 Relative difference = 5.209967070245855e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.070148e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.092369e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.092369e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.028418e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.050644e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.050644e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.549834 sec
+TOTAL       :     1.613813 sec
 INFO: No Floating Point Exceptions have been reported
-     4,636,067,594      cycles                           #    2.985 GHz                    
-    13,177,808,559      instructions                     #    2.84  insn per cycle         
-       1.553904132 seconds time elapsed
+     4,647,383,877      cycles                           #    2.879 GHz                    
+    13,178,063,049      instructions                     #    2.84  insn per cycle         
+       1.618051260 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  681) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482734618697
 Relative difference = 5.099411406595165e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.851566e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.920581e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.920581e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.864504e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.934483e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.934483e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.903963 sec
+TOTAL       :     0.898369 sec
 INFO: No Floating Point Exceptions have been reported
-     2,648,666,014      cycles                           #    2.920 GHz                    
-     7,474,201,263      instructions                     #    2.82  insn per cycle         
-       0.908149777 seconds time elapsed
+     2,648,200,185      cycles                           #    2.937 GHz                    
+     7,475,755,342      instructions                     #    2.82  insn per cycle         
+       0.902206814 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3153) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482734618697
 Relative difference = 5.099411406595165e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.980538e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.167049e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.167049e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.200611e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.408501e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.408501e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.569921 sec
+TOTAL       :     0.532171 sec
 INFO: No Floating Point Exceptions have been reported
-     1,470,951,187      cycles                           #    2.564 GHz                    
-     3,127,090,607      instructions                     #    2.13  insn per cycle         
-       0.574168476 seconds time elapsed
+     1,476,374,652      cycles                           #    2.757 GHz                    
+     3,128,702,616      instructions                     #    2.12  insn per cycle         
+       0.536024340 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3131) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.682566e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.954257e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.954257e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.587903e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.854303e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.854303e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.463686 sec
+TOTAL       :     0.476990 sec
 INFO: No Floating Point Exceptions have been reported
-     1,321,491,982      cycles                           #    2.828 GHz                    
-     2,981,266,637      instructions                     #    2.26  insn per cycle         
-       0.467810621 seconds time elapsed
+     1,322,669,287      cycles                           #    2.754 GHz                    
+     2,982,885,294      instructions                     #    2.26  insn per cycle         
+       0.480825528 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2893) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.347586e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.455232e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.455232e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.251912e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.353383e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.353383e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.719457 sec
+TOTAL       :     0.750102 sec
 INFO: No Floating Point Exceptions have been reported
-     1,362,323,271      cycles                           #    1.885 GHz                    
-     1,990,651,022      instructions                     #    1.46  insn per cycle         
-       0.723612725 seconds time elapsed
+     1,363,693,421      cycles                           #    1.811 GHz                    
+     1,991,339,845      instructions                     #    1.46  insn per cycle         
+       0.753947194 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1679) (512y:  108) (512z: 2252)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
diff --git a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
index 731b43e370..40582e53fc 100644
--- a/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_gqttq_mad/log_gqttq_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux'
 
-DATE: 2024-09-01_23:22:58
+DATE: 2024-09-15_11:20:07
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.998475e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.914130e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.011656e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.750483e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.807638e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.927006e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.450635 sec
+TOTAL       :     0.460124 sec
 INFO: No Floating Point Exceptions have been reported
-     1,944,746,281      cycles                           #    2.916 GHz                    
-     2,764,543,101      instructions                     #    1.42  insn per cycle         
-       0.724024546 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
+     1,938,834,678      cycles                           #    2.862 GHz                    
+     2,712,058,421      instructions                     #    1.40  insn per cycle         
+       0.735469517 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.154229e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.471183e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.669362e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.925117e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.366077e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.584033e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.602505e+02 +- 2.116328e+02 )  GeV^-2
-TOTAL       :     0.532845 sec
+TOTAL       :     0.544645 sec
 INFO: No Floating Point Exceptions have been reported
-     2,225,872,015      cycles                           #    2.895 GHz                    
-     3,236,460,250      instructions                     #    1.45  insn per cycle         
-       0.825344230 seconds time elapsed
+     2,249,855,314      cycles                           #    2.863 GHz                    
+     3,222,814,057      instructions                     #    1.43  insn per cycle         
+       0.844200129 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.424749e-01
 Avg ME (F77/GPU)   = 0.14247482577104625
 Relative difference = 5.209967070245855e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.075863e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.098691e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.098691e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.054095e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.076295e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.076295e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     1.541791 sec
+TOTAL       :     1.574055 sec
 INFO: No Floating Point Exceptions have been reported
-     4,642,155,557      cycles                           #    3.005 GHz                    
-    13,166,673,520      instructions                     #    2.84  insn per cycle         
-       1.545817949 seconds time elapsed
+     4,641,890,435      cycles                           #    2.943 GHz                    
+    13,165,898,661      instructions                     #    2.84  insn per cycle         
+       1.578249512 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  666) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482734618697
 Relative difference = 5.099411406595165e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.900859e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.971794e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.971794e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.867370e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.936884e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.936884e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.880251 sec
+TOTAL       :     0.896912 sec
 INFO: No Floating Point Exceptions have been reported
-     2,634,622,015      cycles                           #    2.982 GHz                    
-     7,476,028,207      instructions                     #    2.84  insn per cycle         
-       0.884332319 seconds time elapsed
+     2,636,737,245      cycles                           #    2.930 GHz                    
+     7,477,755,477      instructions                     #    2.84  insn per cycle         
+       0.900719288 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3142) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482734618697
 Relative difference = 5.099411406595165e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.279907e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.491191e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.491191e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.202775e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.410191e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.410191e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.518217 sec
+TOTAL       :     0.531412 sec
 INFO: No Floating Point Exceptions have been reported
-     1,466,678,966      cycles                           #    2.811 GHz                    
-     3,127,758,508      instructions                     #    2.13  insn per cycle         
-       0.522369566 seconds time elapsed
+     1,468,072,782      cycles                           #    2.747 GHz                    
+     3,129,202,339      instructions                     #    2.13  insn per cycle         
+       0.535248151 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3109) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.666233e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.933754e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.933754e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.576512e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.841608e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.841608e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.465925 sec
+TOTAL       :     0.477966 sec
 INFO: No Floating Point Exceptions have been reported
-     1,319,548,349      cycles                           #    2.812 GHz                    
-     2,981,592,473      instructions                     #    2.26  insn per cycle         
-       0.470005601 seconds time elapsed
+     1,324,577,804      cycles                           #    2.753 GHz                    
+     2,983,698,636      instructions                     #    2.25  insn per cycle         
+       0.481692847 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2869) (512y:  110) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe -p 64 256 10 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SM_GUX_TTXUX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.352608e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.463501e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.463501e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.229034e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.331255e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.331255e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.018083e+01 +- 1.429474e+01 )  GeV^-2
-TOTAL       :     0.717564 sec
+TOTAL       :     0.757523 sec
 INFO: No Floating Point Exceptions have been reported
-     1,362,472,140      cycles                           #    1.890 GHz                    
-     1,989,923,600      instructions                     #    1.46  insn per cycle         
-       0.721628842 seconds time elapsed
+     1,366,953,688      cycles                           #    1.797 GHz                    
+     1,991,556,146      instructions                     #    1.46  insn per cycle         
+       0.761326972 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1655) (512y:  108) (512z: 2252)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 5 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 5 channels { 1 : 128, 2 : 96, 3 : 96, 4 : 96, 5 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/gq_ttq.mad/SubProcesses/P1_gux_ttxux/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.424749e-01
 Avg ME (F77/C++)    = 0.14247482643254802
 Relative difference = 5.163537715318965e-07
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
index 85c78141b0..a10430f205 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:03:25
+DATE: 2024-09-15_12:19:52
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.564691e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.007021e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.548726e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.222962e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.849418e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.427313e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.522287 sec
+TOTAL       :     0.531972 sec
 INFO: No Floating Point Exceptions have been reported
-     2,186,415,358      cycles                           #    2.898 GHz                    
-     3,130,135,893      instructions                     #    1.43  insn per cycle         
-       0.811473600 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,207,295,929      cycles                           #    2.875 GHz                    
+     3,148,652,719      instructions                     #    1.43  insn per cycle         
+       0.824191400 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 228
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313472e+00
 Avg ME (F77/GPU)   = 4.3134710926110280
 Relative difference = 2.1036162329561614e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.649154e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.686776e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.686776e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.605446e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.642912e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.642912e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.466778 sec
+TOTAL       :     6.682964 sec
 INFO: No Floating Point Exceptions have been reported
-    19,279,079,475      cycles                           #    2.980 GHz                    
-    51,925,969,991      instructions                     #    2.69  insn per cycle         
-       6.471928509 seconds time elapsed
+    19,598,347,374      cycles                           #    2.930 GHz                    
+    52,065,080,941      instructions                     #    2.66  insn per cycle         
+       6.694844262 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  668) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926105795
 Relative difference = 2.1036172727915933e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.980039e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.114590e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.114590e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.916629e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.051442e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.051442e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.625823 sec
+TOTAL       :     3.747547 sec
 INFO: No Floating Point Exceptions have been reported
-    10,900,387,506      cycles                           #    3.003 GHz                    
-    30,787,661,791      instructions                     #    2.82  insn per cycle         
-       3.631002102 seconds time elapsed
+    11,065,354,139      cycles                           #    2.943 GHz                    
+    30,912,254,749      instructions                     #    2.79  insn per cycle         
+       3.760361851 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2914) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926105795
 Relative difference = 2.1036172727915933e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.809628e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.159488e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.159488e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.668387e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.008748e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.008748e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.287459 sec
+TOTAL       :     2.399916 sec
 INFO: No Floating Point Exceptions have been reported
-     6,475,661,061      cycles                           #    2.826 GHz                    
-    13,666,695,330      instructions                     #    2.11  insn per cycle         
-       2.292637909 seconds time elapsed
+     6,627,221,489      cycles                           #    2.749 GHz                    
+    13,792,796,598      instructions                     #    2.08  insn per cycle         
+       2.412653295 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2941) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.229667e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.640727e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.640727e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.129922e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.540308e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.540308e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.112314 sec
+TOTAL       :     2.198225 sec
 INFO: No Floating Point Exceptions have been reported
-     5,956,175,949      cycles                           #    2.814 GHz                    
-    13,009,022,456      instructions                     #    2.18  insn per cycle         
-       2.117497927 seconds time elapsed
+     6,104,431,058      cycles                           #    2.762 GHz                    
+    13,134,794,290      instructions                     #    2.15  insn per cycle         
+       2.210920696 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2667) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.552529e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.739990e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.739990e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.449384e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.629220e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.629220e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.059875 sec
+TOTAL       :     3.193674 sec
 INFO: No Floating Point Exceptions have been reported
-     5,839,726,481      cycles                           #    1.906 GHz                    
-     8,588,748,930      instructions                     #    1.47  insn per cycle         
-       3.065145740 seconds time elapsed
+     5,993,463,965      cycles                           #    1.870 GHz                    
+     8,712,960,993      instructions                     #    1.45  insn per cycle         
+       3.206184057 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1506) (512y:  128) (512z: 1943)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
index 0af02463f6..01ceafd1da 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:03:51
+DATE: 2024-09-15_12:20:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.554397e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.959544e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.498828e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.181438e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.797209e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.367517e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.520674 sec
+TOTAL       :     0.531302 sec
 INFO: No Floating Point Exceptions have been reported
-     2,207,639,246      cycles                           #    2.933 GHz                    
-     3,209,912,765      instructions                     #    1.45  insn per cycle         
-       0.809508240 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,216,417,295      cycles                           #    2.883 GHz                    
+     3,137,968,070      instructions                     #    1.42  insn per cycle         
+       0.825226040 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 216
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313472e+00
 Avg ME (F77/GPU)   = 4.3134710926110280
 Relative difference = 2.1036162329561614e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.736140e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.778559e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.778559e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.706608e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.748828e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.748828e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.148636 sec
+TOTAL       :     6.299466 sec
 INFO: No Floating Point Exceptions have been reported
-    18,371,910,047      cycles                           #    2.986 GHz                    
-    50,052,942,467      instructions                     #    2.72  insn per cycle         
-       6.153834062 seconds time elapsed
+    18,540,883,021      cycles                           #    2.938 GHz                    
+    50,178,474,604      instructions                     #    2.71  insn per cycle         
+       6.311951743 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  626) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926105795
 Relative difference = 2.1036172727915933e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.139479e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.287984e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.287984e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.062664e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.211274e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.211274e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.448725 sec
+TOTAL       :     3.576786 sec
 INFO: No Floating Point Exceptions have been reported
-    10,387,878,283      cycles                           #    3.008 GHz                    
-    29,165,847,979      instructions                     #    2.81  insn per cycle         
-       3.454003397 seconds time elapsed
+    10,549,321,378      cycles                           #    2.940 GHz                    
+    29,289,408,214      instructions                     #    2.78  insn per cycle         
+       3.589213709 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2732) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926105795
 Relative difference = 2.1036172727915933e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.453814e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.754112e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.754112e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.340015e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.632096e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.632096e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.462610 sec
+TOTAL       :     2.568858 sec
 INFO: No Floating Point Exceptions have been reported
-     6,945,615,681      cycles                           #    2.815 GHz                    
-    15,149,897,484      instructions                     #    2.18  insn per cycle         
-       2.468064425 seconds time elapsed
+     7,118,801,409      cycles                           #    2.759 GHz                    
+    15,276,261,936      instructions                     #    2.15  insn per cycle         
+       2.581007821 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3021) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.615593e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.939602e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.939602e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.507726e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.822175e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.822175e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.378771 sec
+TOTAL       :     2.479110 sec
 INFO: No Floating Point Exceptions have been reported
-     6,687,743,595      cycles                           #    2.806 GHz                    
-    14,621,758,561      instructions                     #    2.19  insn per cycle         
-       2.383962834 seconds time elapsed
+     6,890,334,799      cycles                           #    2.767 GHz                    
+    14,747,969,860      instructions                     #    2.14  insn per cycle         
+       2.491499387 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2617) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.435627e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.606278e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.606278e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.315391e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.482899e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.482899e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.160533 sec
+TOTAL       :     3.316209 sec
 INFO: No Floating Point Exceptions have been reported
-     6,041,475,634      cycles                           #    1.909 GHz                    
-    10,338,847,329      instructions                     #    1.71  insn per cycle         
-       3.165865752 seconds time elapsed
+     6,207,380,257      cycles                           #    1.865 GHz                    
+    10,464,609,822      instructions                     #    1.69  insn per cycle         
+       3.328585456 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1268) (512y:  214) (512z: 2130)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313472e+00
 Avg ME (F77/C++)    = 4.3134710926107935
 Relative difference = 2.103616776553298e-07
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
index 5fcaa607c5..2ef1c54aa0 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:04:17
+DATE: 2024-09-15_12:20:46
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.404305e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.566258e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.650776e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.552559e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.511007e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.603394e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.484135 sec
+TOTAL       :     0.491259 sec
 INFO: No Floating Point Exceptions have been reported
-     2,071,305,552      cycles                           #    2.928 GHz                    
-     2,975,599,567      instructions                     #    1.44  insn per cycle         
-       0.765951641 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,068,896,846      cycles                           #    2.881 GHz                    
+     2,979,901,367      instructions                     #    1.44  insn per cycle         
+       0.776426531 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 131
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313490e+00
 Avg ME (F77/GPU)   = 4.3136695491848513
 Relative difference = 4.162503792787837e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.706014e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.748099e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.748099e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.683914e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.725672e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.725672e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.232162 sec
+TOTAL       :     6.333148 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,618,405,428      cycles                           #    2.986 GHz                    
-    51,216,623,324      instructions                     #    2.75  insn per cycle         
-       6.237206550 seconds time elapsed
+    18,682,476,989      cycles                           #    2.947 GHz                    
+    51,267,470,348      instructions                     #    2.74  insn per cycle         
+       6.341547157 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  625) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313574e+00
@@ -113,24 +113,24 @@ Avg ME (F77/C++)    = 4.3135738277342170
 Relative difference = 3.9935743068669333e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.053495e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.316601e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.316601e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.015012e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.280109e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.280109e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.671825 sec
+TOTAL       :     2.718050 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,933,798,435      cycles                           #    2.965 GHz                    
-    19,318,604,169      instructions                     #    2.43  insn per cycle         
-       2.676827002 seconds time elapsed
+     8,007,664,837      cycles                           #    2.940 GHz                    
+    19,370,996,217      instructions                     #    2.42  insn per cycle         
+       2.726376718 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3542) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313572e+00
@@ -147,24 +147,24 @@ Avg ME (F77/C++)    = 4.3135722697479650
 Relative difference = 6.253470796314402e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.022508e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.040916e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.040916e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.789023e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.799247e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.799247e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.391682 sec
+TOTAL       :     1.452394 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,945,769,791      cycles                           #    2.827 GHz                    
-     8,833,631,799      instructions                     #    2.24  insn per cycle         
-       1.396789340 seconds time elapsed
+     4,027,415,196      cycles                           #    2.759 GHz                    
+     8,886,566,152      instructions                     #    2.21  insn per cycle         
+       1.460503609 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3715) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -172,31 +172,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313565e+00
 Avg ME (F77/C++)    = 4.3135645242873579
 Relative difference = 1.1028294269894893e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.559238e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.732061e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.732061e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.322323e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.475598e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.475598e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.310268 sec
+TOTAL       :     1.366660 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,728,688,019      cycles                           #    2.837 GHz                    
-     8,435,564,346      instructions                     #    2.26  insn per cycle         
-       1.315273778 seconds time elapsed
+     3,807,036,059      cycles                           #    2.770 GHz                    
+     8,489,981,547      instructions                     #    2.23  insn per cycle         
+       1.374788749 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3543) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -204,31 +204,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 4.313565e+00
 Avg ME (F77/C++)    = 4.3135645242873579
 Relative difference = 1.1028294269894893e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.151674e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.721217e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.721217e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.974329e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.534282e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.534282e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.790416 sec
+TOTAL       :     1.862736 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     3,506,346,207      cycles                           #    1.954 GHz                    
-     6,244,372,214      instructions                     #    1.78  insn per cycle         
-       1.795521362 seconds time elapsed
+     3,570,392,990      cycles                           #    1.910 GHz                    
+     6,298,404,091      instructions                     #    1.76  insn per cycle         
+       1.870756064 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2323) (512y:   24) (512z: 2290)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -236,8 +236,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313564e+00
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
index f789a36ed6..479ebdb204 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:04:39
+DATE: 2024-09-15_12:21:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.689698e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.669285e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.767200e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.776065e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.594605e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.702542e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 7.154219e+00 +- 1.620281e-01 )  GeV^0
-TOTAL       :     0.483733 sec
+TOTAL       :     0.491621 sec
 INFO: No Floating Point Exceptions have been reported
-     2,063,650,160      cycles                           #    2.921 GHz                    
-     2,954,472,232      instructions                     #    1.43  insn per cycle         
-       0.765340835 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,069,264,305      cycles                           #    2.877 GHz                    
+     2,928,235,838      instructions                     #    1.42  insn per cycle         
+       0.775773692 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 125
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313490e+00
 Avg ME (F77/GPU)   = 4.3136695491848513
 Relative difference = 4.162503792787837e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.766379e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.810796e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.810796e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.731608e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.775696e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.775696e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175644e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     6.020230 sec
+TOTAL       :     6.161667 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    18,028,435,551      cycles                           #    2.993 GHz                    
-    49,601,886,319      instructions                     #    2.75  insn per cycle         
-       6.025158583 seconds time elapsed
+    18,113,353,892      cycles                           #    2.937 GHz                    
+    49,656,566,510      instructions                     #    2.74  insn per cycle         
+       6.170127822 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  613) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313574e+00
@@ -113,24 +113,24 @@ Avg ME (F77/C++)    = 4.3135738277342170
 Relative difference = 3.9935743068669333e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.615170e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.957825e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.957825e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.528214e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.868162e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.868162e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.175642e+00 +- 1.658767e-01 )  GeV^0
-TOTAL       :     2.356923 sec
+TOTAL       :     2.421156 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,098,118,019      cycles                           #    3.006 GHz                    
-    18,485,349,164      instructions                     #    2.60  insn per cycle         
-       2.362156827 seconds time elapsed
+     7,159,836,216      cycles                           #    2.949 GHz                    
+    18,538,672,579      instructions                     #    2.59  insn per cycle         
+       2.429136947 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3234) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313572e+00
@@ -147,24 +147,24 @@ Avg ME (F77/C++)    = 4.3135722697479650
 Relative difference = 6.253470796314402e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.522988e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.985731e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.985731e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.353305e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.808520e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.808520e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.981973 sec
+TOTAL       :     2.063917 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,629,917,490      cycles                           #    2.834 GHz                    
-    10,848,645,741      instructions                     #    1.93  insn per cycle         
-       1.986923799 seconds time elapsed
+     5,718,285,058      cycles                           #    2.760 GHz                    
+    10,903,070,951      instructions                     #    1.91  insn per cycle         
+       2.072527320 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4274) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313565e+00
@@ -181,24 +181,24 @@ Avg ME (F77/C++)    = 4.3135645242873579
 Relative difference = 1.1028294269894893e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.583363e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.053605e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.053605e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.452070e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.924355e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.924355e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     1.963258 sec
+TOTAL       :     2.029021 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     5,552,996,034      cycles                           #    2.822 GHz                    
-    10,546,245,088      instructions                     #    1.90  insn per cycle         
-       1.968423168 seconds time elapsed
+     5,634,694,028      cycles                           #    2.767 GHz                    
+    10,598,235,094      instructions                     #    1.88  insn per cycle         
+       2.037144953 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 4135) (512y:   12) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313565e+00
@@ -215,24 +215,24 @@ Avg ME (F77/C++)    = 4.3135645242873579
 Relative difference = 1.1028294269894893e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.527750e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.831624e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.831624e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.351507e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.637189e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.637189e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.198861e+00 +- 1.710281e-01 )  GeV^0
-TOTAL       :     2.402533 sec
+TOTAL       :     2.516038 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     4,632,613,207      cycles                           #    1.926 GHz                    
-     8,660,145,416      instructions                     #    1.87  insn per cycle         
-       2.407440167 seconds time elapsed
+     4,700,188,022      cycles                           #    1.863 GHz                    
+     8,712,811,590      instructions                     #    1.85  insn per cycle         
+       2.524039667 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2799) (512y:    0) (512z: 2885)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313564e+00
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
index f9d2dff641..7f1052231e 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:05:01
+DATE: 2024-09-15_12:21:31
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.542911e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.940059e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.486530e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.233444e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.828530e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.380985e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.526869 sec
+TOTAL       :     0.531166 sec
 INFO: No Floating Point Exceptions have been reported
-     2,202,869,848      cycles                           #    2.914 GHz                    
-     3,187,431,708      instructions                     #    1.45  insn per cycle         
-       0.815017923 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,204,089,924      cycles                           #    2.871 GHz                    
+     3,157,003,197      instructions                     #    1.43  insn per cycle         
+       0.824717149 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 228
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313472e+00
 Avg ME (F77/GPU)   = 4.3134711012809239
 Relative difference = 2.0835166567625394e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.556009e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.589038e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.589038e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.513438e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.546541e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.546541e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.847582 sec
+TOTAL       :     7.083035 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    20,531,344,425      cycles                           #    2.996 GHz                    
-    51,924,253,983      instructions                     #    2.53  insn per cycle         
-       6.852906835 seconds time elapsed
+    20,836,157,287      cycles                           #    2.937 GHz                    
+    52,059,859,689      instructions                     #    2.50  insn per cycle         
+       7.095325403 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  655) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -113,24 +113,24 @@ Avg ME (F77/C++)    = 4.3134711778082178
 Relative difference = 1.906102050071626e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.816147e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.936755e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.936755e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.708543e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.825114e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.825114e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.830973 sec
+TOTAL       :     4.025851 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    11,500,473,942      cycles                           #    2.998 GHz                    
-    30,593,646,107      instructions                     #    2.66  insn per cycle         
-       3.836311415 seconds time elapsed
+    11,676,241,949      cycles                           #    2.892 GHz                    
+    30,719,909,890      instructions                     #    2.63  insn per cycle         
+       4.038601753 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2971) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -147,24 +147,24 @@ Avg ME (F77/C++)    = 4.3134711778082178
 Relative difference = 1.906102050071626e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.569633e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.880415e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.880415e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.506319e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.824086e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.824086e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.402556 sec
+TOTAL       :     2.479724 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,695,614,069      cycles                           #    2.782 GHz                    
-    13,609,071,038      instructions                     #    2.03  insn per cycle         
-       2.407778282 seconds time elapsed
+     6,856,020,570      cycles                           #    2.752 GHz                    
+    13,733,686,621      instructions                     #    2.00  insn per cycle         
+       2.492002268 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3118) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -181,24 +181,24 @@ Avg ME (F77/C++)    = 4.3134712319139954
 Relative difference = 1.7806676491157786e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.065722e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.449896e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.449896e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.929662e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.305854e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.305854e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.178378 sec
+TOTAL       :     2.278940 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,166,178,947      cycles                           #    2.825 GHz                    
-    12,973,644,724      instructions                     #    2.10  insn per cycle         
-       2.183709011 seconds time elapsed
+     6,324,440,516      cycles                           #    2.761 GHz                    
+    13,099,663,654      instructions                     #    2.07  insn per cycle         
+       2.291244442 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2851) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -215,24 +215,24 @@ Avg ME (F77/C++)    = 4.3134712319139954
 Relative difference = 1.7806676491157786e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.224497e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.375730e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.375730e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.121063e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.267193e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.267193e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.361722 sec
+TOTAL       :     3.513123 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,395,645,824      cycles                           #    1.900 GHz                    
-     8,700,671,655      instructions                     #    1.36  insn per cycle         
-       3.366960874 seconds time elapsed
+     6,549,229,378      cycles                           #    1.858 GHz                    
+     8,826,958,587      instructions                     #    1.35  insn per cycle         
+       3.525479379 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1792) (512y:  130) (512z: 2013)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
diff --git a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
index 6c2855449d..b5ff528c40 100644
--- a/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_heftggbb_mad/log_heftggbb_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx'
 
-DATE: 2024-09-02_00:05:28
+DATE: 2024-09-15_12:21:59
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.541786e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.916256e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.462995e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.263927e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.696044e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.276288e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     0.525979 sec
+TOTAL       :     0.532325 sec
 INFO: No Floating Point Exceptions have been reported
-     2,208,390,386      cycles                           #    2.919 GHz                    
-     3,204,166,788      instructions                     #    1.45  insn per cycle         
-       0.815663809 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,205,625,214      cycles                           #    2.868 GHz                    
+     3,174,502,414      instructions                     #    1.44  insn per cycle         
+       0.825987629 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 216
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 4.313472e+00
 Avg ME (F77/GPU)   = 4.3134711012809239
 Relative difference = 2.0835166567625394e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.637772e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.674252e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.674252e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.606903e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.644535e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.644535e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     6.510565 sec
+TOTAL       :     6.677680 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    19,507,805,821      cycles                           #    2.995 GHz                    
-    49,956,387,381      instructions                     #    2.56  insn per cycle         
-       6.515739319 seconds time elapsed
+    19,666,494,079      cycles                           #    2.940 GHz                    
+    50,081,060,677      instructions                     #    2.55  insn per cycle         
+       6.689882991 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  599) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -104,8 +104,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -113,24 +113,24 @@ Avg ME (F77/C++)    = 4.3134711778082178
 Relative difference = 1.906102050071626e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.876906e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.002678e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.002678e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.871340e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.003903e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.003903e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.752629 sec
+TOTAL       :     3.840726 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-    10,992,095,765      cycles                           #    2.926 GHz                    
-    29,099,784,091      instructions                     #    2.65  insn per cycle         
-       3.757823325 seconds time elapsed
+    11,259,304,877      cycles                           #    2.923 GHz                    
+    29,230,934,183      instructions                     #    2.60  insn per cycle         
+       3.852980170 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2807) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -138,8 +138,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -147,24 +147,24 @@ Avg ME (F77/C++)    = 4.3134711778082178
 Relative difference = 1.906102050071626e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.824550e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.042218e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.042218e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.726077e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.943109e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.943109e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.849571 sec
+TOTAL       :     2.970419 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     8,040,596,401      cycles                           #    2.817 GHz                    
-    15,172,177,342      instructions                     #    1.89  insn per cycle         
-       2.854978145 seconds time elapsed
+     8,229,432,856      cycles                           #    2.759 GHz                    
+    15,297,097,015      instructions                     #    1.86  insn per cycle         
+       2.983820409 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3202) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -172,8 +172,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -181,24 +181,24 @@ Avg ME (F77/C++)    = 4.3134712319139954
 Relative difference = 1.7806676491157786e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.946624e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.177226e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.177226e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.908170e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.147639e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.147639e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     2.763530 sec
+TOTAL       :     2.839910 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     7,772,626,027      cycles                           #    2.808 GHz                    
-    14,479,384,179      instructions                     #    1.86  insn per cycle         
-       2.768634488 seconds time elapsed
+     7,866,112,540      cycles                           #    2.759 GHz                    
+    14,608,431,526      instructions                     #    1.86  insn per cycle         
+       2.852893659 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2774) (512y:  304) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -206,8 +206,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
@@ -215,24 +215,24 @@ Avg ME (F77/C++)    = 4.3134712319139954
 Relative difference = 1.7806676491157786e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_HEFT_GG_BBX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.163794e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.307782e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.307782e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.030076e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.169205e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.169205e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 7.148017e+00 +- 1.609110e-01 )  GeV^0
-TOTAL       :     3.421119 sec
+TOTAL       :     3.616163 sec
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
-     6,532,980,349      cycles                           #    1.907 GHz                    
-     9,893,746,394      instructions                     #    1.51  insn per cycle         
-       3.426456301 seconds time elapsed
+     6,699,969,895      cycles                           #    1.847 GHz                    
+    10,018,865,936      instructions                     #    1.50  insn per cycle         
+       3.629335211 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1565) (512y:  216) (512z: 2217)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 4 channels { no-multichannel : 512 }
@@ -240,8 +240,8 @@ INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 DEBUG: MEK (channelid array) processed 512 events across 4 channels { 1 : 128, 2 : 128, 3 : 128, 4 : 128 }
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/heft_gg_bb.mad/SubProcesses/P1_gg_bbx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 INFO: The following Floating Point Exceptions have been reported: FE_UNDERFLOW
 Avg ME (C++/C++)    = 4.313472e+00
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
index bea3e36597..7707f676a6 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:02:20
+DATE: 2024-09-15_12:18:46
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.775059e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.792209e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.795305e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.767516e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.784818e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.787795e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.465694 sec
+TOTAL       :     0.470421 sec
 INFO: No Floating Point Exceptions have been reported
-     2,004,061,261      cycles                           #    2.915 GHz                    
-     2,971,857,164      instructions                     #    1.48  insn per cycle         
-       0.744562845 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
+     1,978,272,924      cycles                           #    2.864 GHz                    
+     2,912,164,766      instructions                     #    1.47  insn per cycle         
+       0.749211691 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.091435e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.188484e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.195415e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.005244e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.117313e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.126114e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480494 sec
+TOTAL       :     0.483026 sec
 INFO: No Floating Point Exceptions have been reported
-     2,068,696,668      cycles                           #    2.915 GHz                    
-     3,082,017,608      instructions                     #    1.49  insn per cycle         
-       0.767061276 seconds time elapsed
+     2,024,260,948      cycles                           #    2.878 GHz                    
+     3,029,497,927      instructions                     #    1.50  insn per cycle         
+       0.762830166 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127459e-06
 Avg ME (F77/GPU)   = 8.1274562860176604E-006
 Relative difference = 3.3392753366481633e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.487038e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.490402e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.490402e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.405701e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.409074e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.409074e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.153531 sec
+TOTAL       :     0.157165 sec
 INFO: No Floating Point Exceptions have been reported
-       468,548,605      cycles                           #    2.990 GHz                    
-     1,389,841,452      instructions                     #    2.97  insn per cycle         
-       0.157176483 seconds time elapsed
+       467,074,127      cycles                           #    2.919 GHz                    
+     1,389,682,298      instructions                     #    2.98  insn per cycle         
+       0.160520641 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3908) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860167185E-006
 Relative difference = 3.339276495559746e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.658870e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.671057e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.671057e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.459230e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.470849e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.470849e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.081219 sec
+TOTAL       :     0.083614 sec
 INFO: No Floating Point Exceptions have been reported
-       240,143,250      cycles                           #    2.847 GHz                    
-       693,085,573      instructions                     #    2.89  insn per cycle         
-       0.084894638 seconds time elapsed
+       239,038,405      cycles                           #    2.765 GHz                    
+       692,921,675      instructions                     #    2.90  insn per cycle         
+       0.087016440 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9483) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860167168E-006
 Relative difference = 3.3392764976441195e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.451702e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.457378e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.457378e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.419984e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.425694e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.425694e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.038390 sec
+TOTAL       :     0.039288 sec
 INFO: No Floating Point Exceptions have been reported
-       114,533,313      cycles                           #    2.750 GHz                    
-       258,003,017      instructions                     #    2.25  insn per cycle         
-       0.042202043 seconds time elapsed
+       113,366,397      cycles                           #    2.696 GHz                    
+       257,996,166      instructions                     #    2.28  insn per cycle         
+       0.042698199 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8503) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.668827e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.676697e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.676697e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.624961e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.632288e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.632288e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033710 sec
+TOTAL       :     0.034517 sec
 INFO: No Floating Point Exceptions have been reported
-       102,401,077      cycles                           #    2.773 GHz                    
-       240,000,244      instructions                     #    2.34  insn per cycle         
-       0.037486472 seconds time elapsed
+       101,263,068      cycles                           #    2.711 GHz                    
+       239,969,377      instructions                     #    2.37  insn per cycle         
+       0.037861089 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8140) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.238178e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.244573e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.244573e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.199166e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.204857e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.204857e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.044867 sec
+TOTAL       :     0.046301 sec
 INFO: No Floating Point Exceptions have been reported
-        89,835,188      cycles                           #    1.864 GHz                    
-       134,305,795      instructions                     #    1.50  insn per cycle         
-       0.048794310 seconds time elapsed
+        89,031,390      cycles                           #    1.806 GHz                    
+       134,346,666      instructions                     #    1.51  insn per cycle         
+       0.049794003 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1943) (512y:  126) (512z: 7090)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
index a1c588c090..ca3a407fd8 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:02:31
+DATE: 2024-09-15_12:18:57
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.822628e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.840741e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.844507e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.802842e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.821481e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.824642e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.465549 sec
+TOTAL       :     0.470683 sec
 INFO: No Floating Point Exceptions have been reported
-     2,003,972,246      cycles                           #    2.916 GHz                    
-     2,932,804,269      instructions                     #    1.46  insn per cycle         
-       0.743970096 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
+     1,990,204,132      cycles                           #    2.870 GHz                    
+     2,908,985,105      instructions                     #    1.46  insn per cycle         
+       0.750697991 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.175681e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.275605e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.282748e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.083966e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.205394e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.213656e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480908 sec
+TOTAL       :     0.487582 sec
 INFO: No Floating Point Exceptions have been reported
-     2,062,625,343      cycles                           #    2.927 GHz                    
-     3,103,034,956      instructions                     #    1.50  insn per cycle         
-       0.761786052 seconds time elapsed
+     2,025,056,560      cycles                           #    2.852 GHz                    
+     2,989,421,142      instructions                     #    1.48  insn per cycle         
+       0.769117174 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127459e-06
 Avg ME (F77/GPU)   = 8.1274562860176604E-006
 Relative difference = 3.3392753366481633e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.505070e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.508741e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.508741e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.394566e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.397741e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.397741e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.152229 sec
+TOTAL       :     0.157033 sec
 INFO: No Floating Point Exceptions have been reported
-       465,988,139      cycles                           #    3.002 GHz                    
-     1,385,168,372      instructions                     #    2.97  insn per cycle         
-       0.155892961 seconds time elapsed
+       465,720,728      cycles                           #    2.911 GHz                    
+     1,385,003,144      instructions                     #    2.97  insn per cycle         
+       0.160593741 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3796) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860167185E-006
 Relative difference = 3.339276495559746e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.681485e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.694162e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.694162e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.474186e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.485931e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.485931e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.080345 sec
+TOTAL       :     0.082867 sec
 INFO: No Floating Point Exceptions have been reported
-       239,153,178      cycles                           #    2.865 GHz                    
-       689,108,288      instructions                     #    2.88  insn per cycle         
-       0.084004313 seconds time elapsed
+       237,575,401      cycles                           #    2.770 GHz                    
+       689,116,420      instructions                     #    2.90  insn per cycle         
+       0.086305788 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9528) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860167168E-006
 Relative difference = 3.3392764976441195e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.478175e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.484010e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.484010e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.436754e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.442531e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.442531e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.037001 sec
+TOTAL       :     0.038039 sec
 INFO: No Floating Point Exceptions have been reported
-       111,373,310      cycles                           #    2.769 GHz                    
-       253,520,749      instructions                     #    2.28  insn per cycle         
-       0.040765186 seconds time elapsed
+       110,520,646      cycles                           #    2.700 GHz                    
+       253,448,082      instructions                     #    2.29  insn per cycle         
+       0.041474271 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8458) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.631032e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.638379e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.638379e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.611056e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.618327e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.618327e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033662 sec
+TOTAL       :     0.034052 sec
 INFO: No Floating Point Exceptions have been reported
-       100,161,001      cycles                           #    2.721 GHz                    
-       235,610,104      instructions                     #    2.35  insn per cycle         
-       0.037359987 seconds time elapsed
+        98,863,837      cycles                           #    2.687 GHz                    
+       235,605,174      instructions                     #    2.38  insn per cycle         
+       0.037353270 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8098) (512y:  150) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.233910e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.239184e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.239184e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.176506e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.181658e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.181658e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.044222 sec
+TOTAL       :     0.046442 sec
 INFO: No Floating Point Exceptions have been reported
-        87,663,045      cycles                           #    1.846 GHz                    
-       129,734,772      instructions                     #    1.48  insn per cycle         
-       0.048056644 seconds time elapsed
+        86,647,290      cycles                           #    1.764 GHz                    
+       129,720,267      instructions                     #    1.50  insn per cycle         
+       0.049837932 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1899) (512y:  126) (512z: 7094)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562860174791E-006
 Relative difference = 3.3392755596761116e-07
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
index a2f41df6d1..0df257cc6a 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:02:42
+DATE: 2024-09-15_12:19:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.220036e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.228758e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.230706e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.204232e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.214249e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.216451e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.469142 sec
+TOTAL       :     0.474860 sec
 INFO: No Floating Point Exceptions have been reported
-     1,998,351,728      cycles                           #    2.917 GHz                    
-     2,946,568,597      instructions                     #    1.47  insn per cycle         
-       0.742067766 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
+     1,958,003,333      cycles                           #    2.836 GHz                    
+     2,859,472,548      instructions                     #    1.46  insn per cycle         
+       0.747968607 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.043635e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.115608e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.122969e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.933159e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.016706e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.024424e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020494e-03 +- 4.025605e-03 )  GeV^-4
-TOTAL       :     0.469969 sec
+TOTAL       :     0.477251 sec
 INFO: No Floating Point Exceptions have been reported
-     1,999,338,774      cycles                           #    2.916 GHz                    
-     2,967,297,221      instructions                     #    1.48  insn per cycle         
-       0.742840858 seconds time elapsed
+     1,992,584,596      cycles                           #    2.867 GHz                    
+     2,884,692,368      instructions                     #    1.45  insn per cycle         
+       0.751767434 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127250e-06
 Avg ME (F77/GPU)   = 8.1272869669930272E-006
 Relative difference = 4.548524165778887e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.525411e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.528893e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.528893e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.431583e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.434922e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.434922e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.151823 sec
+TOTAL       :     0.155945 sec
 INFO: No Floating Point Exceptions have been reported
-       463,685,635      cycles                           #    2.993 GHz                    
-     1,381,979,905      instructions                     #    2.98  insn per cycle         
-       0.155507501 seconds time elapsed
+       462,855,819      cycles                           #    2.915 GHz                    
+     1,381,844,785      instructions                     #    2.99  insn per cycle         
+       0.159290331 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3058) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127811e-06
 Avg ME (F77/C++)    = 8.1278105271212486E-006
 Relative difference = 5.8180333155894157e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.230460e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.234563e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.234563e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.210882e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.215211e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.215211e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.044834 sec
+TOTAL       :     0.045512 sec
 INFO: No Floating Point Exceptions have been reported
-       132,674,412      cycles                           #    2.762 GHz                    
-       372,118,908      instructions                     #    2.80  insn per cycle         
-       0.048600516 seconds time elapsed
+       131,360,157      cycles                           #    2.718 GHz                    
+       372,013,509      instructions                     #    2.83  insn per cycle         
+       0.048801319 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:10141) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127809e-06
 Avg ME (F77/C++)    = 8.1278090510674588E-006
 Relative difference = 6.2830535070193674e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.857674e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.882410e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.882410e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.769306e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.791871e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.791871e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.020489 sec
+TOTAL       :     0.021120 sec
 INFO: No Floating Point Exceptions have been reported
-        65,153,175      cycles                           #    2.750 GHz                    
-       142,867,018      instructions                     #    2.19  insn per cycle         
-       0.024228257 seconds time elapsed
+        64,157,831      cycles                           #    2.680 GHz                    
+       142,829,765      instructions                     #    2.23  insn per cycle         
+       0.024479209 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9251) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275366216540664E-006
 Relative difference = 4.655111786058001e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.178975e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.210365e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.210365e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.078002e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.105354e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.105354e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.018701 sec
+TOTAL       :     0.019156 sec
 INFO: No Floating Point Exceptions have been reported
-        59,922,348      cycles                           #    2.743 GHz                    
-       132,780,375      instructions                     #    2.22  insn per cycle         
-       0.022373587 seconds time elapsed
+        59,143,033      cycles                           #    2.685 GHz                    
+       132,774,537      instructions                     #    2.24  insn per cycle         
+       0.022562262 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8963) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275366216540664E-006
 Relative difference = 4.655111786058001e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.404211e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.426424e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.426424e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.363857e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.386270e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.386270e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.024073 sec
+TOTAL       :     0.024531 sec
 INFO: No Floating Point Exceptions have been reported
-        52,328,028      cycles                           #    1.905 GHz                    
-        79,593,074      instructions                     #    1.52  insn per cycle         
-       0.028041224 seconds time elapsed
+        51,349,038      cycles                           #    1.867 GHz                    
+        79,557,658      instructions                     #    1.55  insn per cycle         
+       0.028087213 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2834) (512y:   32) (512z: 7442)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275369863475849E-006
 Relative difference = 1.6797726498700304e-09
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
index f5309d6249..1f92901611 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:02:52
+DATE: 2024-09-15_12:19:19
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.247820e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.258266e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.260209e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.234747e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.244150e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.246143e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.188141e-04 +- 6.565202e-04 )  GeV^-4
-TOTAL       :     0.472822 sec
+TOTAL       :     0.473072 sec
 INFO: No Floating Point Exceptions have been reported
-     2,009,923,207      cycles                           #    2.920 GHz                    
-     2,890,056,049      instructions                     #    1.44  insn per cycle         
-       0.745979682 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
+     1,983,051,703      cycles                           #    2.868 GHz                    
+     2,920,120,611      instructions                     #    1.47  insn per cycle         
+       0.748230768 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 8.160560e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 8.234872e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 8.242045e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.099236e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 8.191571e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 8.199199e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.020496e-03 +- 4.025606e-03 )  GeV^-4
-TOTAL       :     0.469871 sec
+TOTAL       :     0.474597 sec
 INFO: No Floating Point Exceptions have been reported
-     2,012,230,071      cycles                           #    2.932 GHz                    
-     2,945,579,242      instructions                     #    1.46  insn per cycle         
-       0.743137766 seconds time elapsed
+     1,988,967,454      cycles                           #    2.876 GHz                    
+     2,944,964,203      instructions                     #    1.48  insn per cycle         
+       0.748107743 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127250e-06
 Avg ME (F77/GPU)   = 8.1272866419447706E-006
 Relative difference = 4.508529302013153e-06
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.539442e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.543006e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.543006e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.448809e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.452114e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.452114e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177153e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.150565 sec
+TOTAL       :     0.154406 sec
 INFO: No Floating Point Exceptions have been reported
-       461,456,563      cycles                           #    3.002 GHz                    
-     1,376,868,974      instructions                     #    2.98  insn per cycle         
-       0.154279891 seconds time elapsed
+       460,841,033      cycles                           #    2.931 GHz                    
+     1,376,637,796      instructions                     #    2.99  insn per cycle         
+       0.157690889 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2930) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127811e-06
 Avg ME (F77/C++)    = 8.1278105271212486E-006
 Relative difference = 5.8180333155894157e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.248007e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.252977e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.252977e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.217964e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.222354e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.222354e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.177152e-04 +- 6.554185e-04 )  GeV^-4
-TOTAL       :     0.043577 sec
+TOTAL       :     0.044518 sec
 INFO: No Floating Point Exceptions have been reported
-       130,400,124      cycles                           #    2.789 GHz                    
-       367,239,733      instructions                     #    2.82  insn per cycle         
-       0.047312650 seconds time elapsed
+       129,447,390      cycles                           #    2.729 GHz                    
+       367,192,934      instructions                     #    2.84  insn per cycle         
+       0.047990838 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:10124) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127809e-06
 Avg ME (F77/C++)    = 8.1278090510674588E-006
 Relative difference = 6.2830535070193674e-09
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.757273e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.781001e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.781001e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.769546e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.792490e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.792490e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.020362 sec
+TOTAL       :     0.020349 sec
 INFO: No Floating Point Exceptions have been reported
-        63,133,267      cycles                           #    2.675 GHz                    
-       138,046,859      instructions                     #    2.19  insn per cycle         
-       0.024144110 seconds time elapsed
+        62,145,033      cycles                           #    2.684 GHz                    
+       138,048,264      instructions                     #    2.22  insn per cycle         
+       0.023682982 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 9205) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275366216540664E-006
 Relative difference = 4.655111786058001e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.124081e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.151751e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.151751e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.058079e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.086570e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.086570e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165746e-04 +- 6.542823e-04 )  GeV^-4
-TOTAL       :     0.018102 sec
+TOTAL       :     0.018477 sec
 INFO: No Floating Point Exceptions have been reported
-        57,980,217      cycles                           #    2.710 GHz                    
-       127,982,392      instructions                     #    2.21  insn per cycle         
-       0.021901107 seconds time elapsed
+        56,677,502      cycles                           #    2.660 GHz                    
+       127,963,925      instructions                     #    2.26  insn per cycle         
+       0.021825959 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8919) (512y:   28) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275366216540664E-006
 Relative difference = 4.655111786058001e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.313423e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.333536e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.333536e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.337142e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.358958e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.358958e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.165747e-04 +- 6.542824e-04 )  GeV^-4
-TOTAL       :     0.024267 sec
+TOTAL       :     0.023953 sec
 INFO: No Floating Point Exceptions have been reported
-        50,462,260      cycles                           #    1.826 GHz                    
-        74,803,342      instructions                     #    1.48  insn per cycle         
-       0.028209078 seconds time elapsed
+        48,824,483      cycles                           #    1.820 GHz                    
+        74,785,723      instructions                     #    1.53  insn per cycle         
+       0.027430916 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2789) (512y:   32) (512z: 7444)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127537e-06
 Avg ME (F77/C++)    = 8.1275369863475849E-006
 Relative difference = 1.6797726498700304e-09
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
index 2890a42304..c9ae973486 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:03:03
+DATE: 2024-09-15_12:19:30
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.776343e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.792993e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.795961e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.749294e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.767595e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.770609e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.466388 sec
+TOTAL       :     0.467392 sec
 INFO: No Floating Point Exceptions have been reported
-     2,004,927,949      cycles                           #    2.915 GHz                    
-     2,963,573,449      instructions                     #    1.48  insn per cycle         
-       0.745240545 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
+     1,983,595,553      cycles                           #    2.874 GHz                    
+     2,922,486,219      instructions                     #    1.47  insn per cycle         
+       0.746529670 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.018693e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.125822e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.133051e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.927630e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.040034e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.047831e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.480949 sec
+TOTAL       :     0.485867 sec
 INFO: No Floating Point Exceptions have been reported
-     2,053,433,685      cycles                           #    2.916 GHz                    
-     3,058,996,758      instructions                     #    1.49  insn per cycle         
-       0.761626110 seconds time elapsed
+     2,031,462,606      cycles                           #    2.875 GHz                    
+     3,037,983,552      instructions                     #    1.50  insn per cycle         
+       0.765937206 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127459e-06
 Avg ME (F77/GPU)   = 8.1274562879405200E-006
 Relative difference = 3.3369094561706885e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.464561e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.467953e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.467953e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.382949e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.386200e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.386200e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.154764 sec
+TOTAL       :     0.158173 sec
 INFO: No Floating Point Exceptions have been reported
-       472,532,900      cycles                           #    2.991 GHz                    
-     1,398,413,020      instructions                     #    2.96  insn per cycle         
-       0.158536821 seconds time elapsed
+       471,387,733      cycles                           #    2.929 GHz                    
+     1,398,281,899      instructions                     #    2.97  insn per cycle         
+       0.161473463 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3899) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562948736117E-006
 Relative difference = 3.32837900190667e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.897064e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.911051e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.911051e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.673807e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.686050e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.686050e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.078635 sec
+TOTAL       :     0.080997 sec
 INFO: No Floating Point Exceptions have been reported
-       236,584,738      cycles                           #    2.891 GHz                    
-       688,026,328      instructions                     #    2.91  insn per cycle         
-       0.082347743 seconds time elapsed
+       235,160,008      cycles                           #    2.808 GHz                    
+       688,033,850      instructions                     #    2.93  insn per cycle         
+       0.084339129 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9328) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563175290919E-006
 Relative difference = 3.3005037703909805e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.484530e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.491505e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.491505e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.415459e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.422136e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.422136e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.037635 sec
+TOTAL       :     0.039362 sec
 INFO: No Floating Point Exceptions have been reported
-       113,465,558      cycles                           #    2.779 GHz                    
-       253,078,905      instructions                     #    2.23  insn per cycle         
-       0.041334338 seconds time elapsed
+       112,339,380      cycles                           #    2.665 GHz                    
+       253,052,093      instructions                     #    2.25  insn per cycle         
+       0.042695307 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8363) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.662894e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.670459e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.670459e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.648852e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.656658e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.656658e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.033766 sec
+TOTAL       :     0.034079 sec
 INFO: No Floating Point Exceptions have been reported
-       101,027,313      cycles                           #    2.738 GHz                    
-       233,681,045      instructions                     #    2.31  insn per cycle         
-       0.037444704 seconds time elapsed
+       100,217,114      cycles                           #    2.715 GHz                    
+       233,607,212      instructions                     #    2.33  insn per cycle         
+       0.037476380 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7501) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.257642e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.262778e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.262778e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.192314e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.197366e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.197366e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.044140 sec
+TOTAL       :     0.046499 sec
 INFO: No Floating Point Exceptions have been reported
-        90,884,685      cycles                           #    1.918 GHz                    
-       133,253,680      instructions                     #    1.47  insn per cycle         
-       0.048030387 seconds time elapsed
+        89,493,670      cycles                           #    1.812 GHz                    
+       133,128,515      instructions                     #    1.49  insn per cycle         
+       0.049962595 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2084) (512y:  122) (512z: 6356)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
diff --git a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
index 83e47710ab..1d81f994cb 100644
--- a/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_smeftggtttt_mad/log_smeftggtttt_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,73 +11,73 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx'
 
-DATE: 2024-09-02_00:03:14
+DATE: 2024-09-15_12:19:41
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 2.806214e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.825154e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.830557e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.765961e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.783708e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.789350e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.465671 sec
+TOTAL       :     0.474700 sec
 INFO: No Floating Point Exceptions have been reported
-     2,001,070,781      cycles                           #    2.911 GHz                    
-     2,925,166,809      instructions                     #    1.46  insn per cycle         
-       0.744689271 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
+     1,974,719,372      cycles                           #    2.833 GHz                    
+     2,899,642,626      instructions                     #    1.47  insn per cycle         
+       0.754986373 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 1 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 255
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 .........................................................................
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 64 256 1 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 7.157504e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.254965e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.263055e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.058757e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.171730e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.179415e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 8.048215e-03 +- 4.042405e-03 )  GeV^-4
-TOTAL       :     0.485337 sec
+TOTAL       :     0.484755 sec
 INFO: No Floating Point Exceptions have been reported
-     2,060,668,286      cycles                           #    2.923 GHz                    
-     3,043,410,338      instructions                     #    1.48  insn per cycle         
-       0.766773978 seconds time elapsed
+     2,032,400,386      cycles                           #    2.878 GHz                    
+     3,034,442,470      instructions                     #    1.49  insn per cycle         
+       0.765490241 seconds time elapsed
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -85,33 +85,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 8.127459e-06
 Avg ME (F77/GPU)   = 8.1274562879405200E-006
 Relative difference = 3.3369094561706885e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 3.456469e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.459995e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.459995e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.419840e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.423095e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.423095e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.154174 sec
+TOTAL       :     0.155760 sec
 INFO: No Floating Point Exceptions have been reported
-       468,802,292      cycles                           #    2.980 GHz                    
-     1,393,811,967      instructions                     #    2.97  insn per cycle         
-       0.157901348 seconds time elapsed
+       467,249,665      cycles                           #    2.946 GHz                    
+     1,393,566,061      instructions                     #    2.98  insn per cycle         
+       0.159156822 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3800) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -119,31 +119,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274562948736117E-006
 Relative difference = 3.32837900190667e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.799064e+03                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.811422e+03                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.811422e+03                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.647634e+03                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.659890e+03                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.659890e+03                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.078911 sec
+TOTAL       :     0.080628 sec
 INFO: No Floating Point Exceptions have been reported
-       235,085,555      cycles                           #    2.865 GHz                    
-       684,192,560      instructions                     #    2.91  insn per cycle         
-       0.082631566 seconds time elapsed
+       234,377,416      cycles                           #    2.808 GHz                    
+       684,139,763      instructions                     #    2.92  insn per cycle         
+       0.083918243 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 9361) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -151,31 +151,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563175290919E-006
 Relative difference = 3.3005037703909805e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.457620e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.463500e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.463500e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.444361e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.450464e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.450464e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.037503 sec
+TOTAL       :     0.037847 sec
 INFO: No Floating Point Exceptions have been reported
-       111,107,100      cycles                           #    2.733 GHz                    
-       248,710,507      instructions                     #    2.24  insn per cycle         
-       0.041153089 seconds time elapsed
+       110,057,998      cycles                           #    2.704 GHz                    
+       248,602,467      instructions                     #    2.26  insn per cycle         
+       0.041225455 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 8316) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -183,31 +183,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.611440e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.619238e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.619238e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.658647e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.666343e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.666343e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.034022 sec
+TOTAL       :     0.033122 sec
 INFO: No Floating Point Exceptions have been reported
-       103,066,093      cycles                           #    2.763 GHz                    
-       229,322,184      instructions                     #    2.23  insn per cycle         
-       0.037860220 seconds time elapsed
+        97,824,445      cycles                           #    2.731 GHz                    
+       229,151,030      instructions                     #    2.34  insn per cycle         
+       0.036353420 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 7452) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -215,31 +215,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 1 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_SMEFTSIM_TOPU3L_MWSCHEME_UFO_GG_TTXTTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.232630e+04                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.237560e+04                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.237560e+04                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.188861e+04                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.193842e+04                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.193842e+04                 )  sec^-1
 MeanMatrixElemValue         = ( 7.185537e-04 +- 6.562553e-04 )  GeV^-4
-TOTAL       :     0.044312 sec
+TOTAL       :     0.046038 sec
 INFO: No Floating Point Exceptions have been reported
-        88,617,933      cycles                           #    1.864 GHz                    
-       128,655,107      instructions                     #    1.45  insn per cycle         
-       0.048048536 seconds time elapsed
+        87,629,988      cycles                           #    1.795 GHz                    
+       128,556,729      instructions                     #    1.47  insn per cycle         
+       0.049600721 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2035) (512y:  122) (512z: 6356)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 72 channels { no-multichannel : 512 }
@@ -247,8 +247,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 72 channels { 1 : 32, 2 : 32, 3 : 32, 4 : 32, 5 : 32, 6 : 32, 7 : 32, 8 : 32, 9 : 32, 10 : 32, 11 : 32, 12 : 32, 13 : 32, 14 : 32, 15 : 32, 16 : 32 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/smeft_gg_tttt.mad/SubProcesses/P1_gg_ttxttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 8.127459e-06
 Avg ME (F77/C++)    = 8.1274563450143301E-006
 Relative difference = 3.266686019634872e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
index 617b4177f7..808bf6828b 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:01:09
+DATE: 2024-09-15_12:17:33
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.705939e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.441412e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.886849e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.071674e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.333003e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.756234e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.513831 sec
+TOTAL       :     0.519163 sec
 INFO: No Floating Point Exceptions have been reported
-     2,172,306,307      cycles                           #    2.920 GHz                    
-     3,124,659,871      instructions                     #    1.44  insn per cycle         
-       0.802671698 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,179,162,165      cycles                           #    2.882 GHz                    
+     3,070,881,799      instructions                     #    1.41  insn per cycle         
+       0.812256060 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 130
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477196e-01
 Avg ME (F77/GPU)   = 0.14771956172964262
 Relative difference = 2.590743366698123e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.271347e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.056940e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.056940e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.736392e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.961242e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.961242e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.241787 sec
+TOTAL       :     1.358463 sec
 INFO: No Floating Point Exceptions have been reported
-     3,734,524,562      cycles                           #    2.996 GHz                    
-     9,729,186,535      instructions                     #    2.61  insn per cycle         
-       1.247040130 seconds time elapsed
+     3,905,907,731      cycles                           #    2.851 GHz                    
+     9,863,781,254      instructions                     #    2.53  insn per cycle         
+       1.371009162 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  341) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.526302e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.956278e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.956278e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.459599e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.873378e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.873378e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.798035 sec
+TOTAL       :     0.877453 sec
 INFO: No Floating Point Exceptions have been reported
-     2,324,928,559      cycles                           #    2.897 GHz                    
-     5,934,379,701      instructions                     #    2.55  insn per cycle         
-       0.803329705 seconds time elapsed
+     2,486,018,663      cycles                           #    2.796 GHz                    
+     6,068,811,134      instructions                     #    2.44  insn per cycle         
+       0.890013058 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1369) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.269779e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.324433e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.324433e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.202336e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.241982e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.241982e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.572529 sec
+TOTAL       :     0.633614 sec
 INFO: No Floating Point Exceptions have been reported
-     1,662,682,093      cycles                           #    2.881 GHz                    
-     3,316,788,109      instructions                     #    1.99  insn per cycle         
-       0.577756787 seconds time elapsed
+     1,818,277,006      cycles                           #    2.816 GHz                    
+     3,450,832,845      instructions                     #    1.90  insn per cycle         
+       0.646259584 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1499) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.280934e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.365920e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.365920e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.283565e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.391222e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.391222e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.571816 sec
+TOTAL       :     0.617501 sec
 INFO: No Floating Point Exceptions have been reported
-     1,616,508,739      cycles                           #    2.804 GHz                    
-     3,286,225,269      instructions                     #    2.03  insn per cycle         
-       0.577076218 seconds time elapsed
+     1,780,688,704      cycles                           #    2.829 GHz                    
+     3,420,263,634      instructions                     #    1.92  insn per cycle         
+       0.630172459 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1375) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.213094e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.193189e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.193189e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.121818e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.051040e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.051040e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.586010 sec
+TOTAL       :     0.653754 sec
 INFO: No Floating Point Exceptions have been reported
-     1,368,385,365      cycles                           #    2.316 GHz                    
-     2,425,788,088      instructions                     #    1.77  insn per cycle         
-       0.591541683 seconds time elapsed
+     1,527,075,900      cycles                           #    2.294 GHz                    
+     2,560,289,188      instructions                     #    1.68  insn per cycle         
+       0.666212420 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  580) (512y:   60) (512z: 1021)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
index b0f30d9af2..06cbb3e926 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:01:21
+DATE: 2024-09-15_12:17:46
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.832298e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.616304e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.132808e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.969739e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.449933e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.971179e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.513767 sec
+TOTAL       :     0.523956 sec
 INFO: No Floating Point Exceptions have been reported
-     2,171,398,785      cycles                           #    2.919 GHz                    
-     3,102,319,188      instructions                     #    1.43  insn per cycle         
-       0.802857401 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,197,273,631      cycles                           #    2.867 GHz                    
+     3,116,260,127      instructions                     #    1.42  insn per cycle         
+       0.822598423 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477196e-01
 Avg ME (F77/GPU)   = 0.14771956172964262
 Relative difference = 2.590743366698123e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.288371e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.060959e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.060959e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.011162e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.031790e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.031790e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.237288 sec
+TOTAL       :     1.321281 sec
 INFO: No Floating Point Exceptions have been reported
-     3,712,284,308      cycles                           #    2.989 GHz                    
-     9,611,420,819      instructions                     #    2.59  insn per cycle         
-       1.242501765 seconds time elapsed
+     3,893,594,822      cycles                           #    2.920 GHz                    
+     9,744,555,445      instructions                     #    2.50  insn per cycle         
+       1.334263922 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  359) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.474254e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.878832e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.878832e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.381914e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.838565e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.838565e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.822207 sec
+TOTAL       :     0.917902 sec
 INFO: No Floating Point Exceptions have been reported
-     2,343,690,158      cycles                           #    2.835 GHz                    
-     5,879,796,075      instructions                     #    2.51  insn per cycle         
-       0.827511338 seconds time elapsed
+     2,659,815,270      cycles                           #    2.862 GHz                    
+     6,026,660,919      instructions                     #    2.27  insn per cycle         
+       0.930359460 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1335) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.220023e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.260647e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.260647e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.192597e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.249251e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.249251e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.585099 sec
+TOTAL       :     0.636612 sec
 INFO: No Floating Point Exceptions have been reported
-     1,645,862,133      cycles                           #    2.790 GHz                    
-     3,288,630,162      instructions                     #    2.00  insn per cycle         
-       0.590634091 seconds time elapsed
+     1,830,188,885      cycles                           #    2.821 GHz                    
+     3,421,758,036      instructions                     #    1.87  insn per cycle         
+       0.649438298 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1436) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.298542e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.412196e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.412196e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.271531e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.393290e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.393290e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.568844 sec
+TOTAL       :     0.619901 sec
 INFO: No Floating Point Exceptions have been reported
-     1,610,064,623      cycles                           #    2.808 GHz                    
-     3,262,929,746      instructions                     #    2.03  insn per cycle         
-       0.574076244 seconds time elapsed
+     1,782,873,131      cycles                           #    2.820 GHz                    
+     3,395,941,059      instructions                     #    1.90  insn per cycle         
+       0.632832806 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1328) (512y:   96) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.203373e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.175027e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.175027e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.134142e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.070820e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.070820e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.587261 sec
+TOTAL       :     0.650853 sec
 INFO: No Floating Point Exceptions have been reported
-     1,373,579,028      cycles                           #    2.321 GHz                    
-     2,411,054,137      instructions                     #    1.76  insn per cycle         
-       0.592531349 seconds time elapsed
+     1,547,796,135      cycles                           #    2.334 GHz                    
+     2,545,431,106      instructions                     #    1.64  insn per cycle         
+       0.664117617 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  547) (512y:   60) (512z: 1007)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956172964268
 Relative difference = 2.59074336294025e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
index ae21b45fb2..6bbc9fb0da 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:01:34
+DATE: 2024-09-15_12:17:58
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.172680e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.277446e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.670009e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.000684e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.992629e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.388281e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.476855 sec
+TOTAL       :     0.488697 sec
 INFO: No Floating Point Exceptions have been reported
-     2,080,048,082      cycles                           #    2.923 GHz                    
-     2,934,470,906      instructions                     #    1.41  insn per cycle         
-       0.769357298 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,047,674,909      cycles                           #    2.864 GHz                    
+     2,921,802,724      instructions                     #    1.43  insn per cycle         
+       0.773667864 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 97
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477195e-01
 Avg ME (F77/GPU)   = 0.14771956735057756
 Relative difference = 4.559355911674916e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.341262e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.072834e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.072834e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.029870e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.042886e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.042886e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.210080 sec
+TOTAL       :     1.272870 sec
 INFO: No Floating Point Exceptions have been reported
-     3,649,824,194      cycles                           #    3.005 GHz                    
-     9,602,319,220      instructions                     #    2.63  insn per cycle         
-       1.215028903 seconds time elapsed
+     3,752,313,957      cycles                           #    2.930 GHz                    
+     9,659,106,684      instructions                     #    2.57  insn per cycle         
+       1.281538641 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  463) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956094773486
 Relative difference = 2.643675256627469e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.253255e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.405386e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.405386e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.197021e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.333033e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.333033e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.552399 sec
+TOTAL       :     0.588107 sec
 INFO: No Floating Point Exceptions have been reported
-     1,632,093,899      cycles                           #    2.932 GHz                    
-     3,968,324,524      instructions                     #    2.43  insn per cycle         
-       0.557382299 seconds time elapsed
+     1,715,029,446      cycles                           #    2.877 GHz                    
+     4,025,277,973      instructions                     #    2.35  insn per cycle         
+       0.596790312 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1579) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955861942843
 Relative difference = 2.80129187869649e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.047638e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.394338e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.394338e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.961392e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.263984e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.263984e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.429395 sec
+TOTAL       :     0.463961 sec
 INFO: No Floating Point Exceptions have been reported
-     1,250,766,491      cycles                           #    2.884 GHz                    
-     2,498,057,764      instructions                     #    2.00  insn per cycle         
-       0.434425808 seconds time elapsed
+     1,335,854,072      cycles                           #    2.831 GHz                    
+     2,555,445,671      instructions                     #    1.91  insn per cycle         
+       0.472547002 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1924) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955698961392
 Relative difference = 2.9116235141448046e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.179958e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.709899e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.709899e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.064902e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.593039e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.593039e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.416325 sec
+TOTAL       :     0.451960 sec
 INFO: No Floating Point Exceptions have been reported
-     1,219,121,155      cycles                           #    2.899 GHz                    
-     2,472,105,838      instructions                     #    2.03  insn per cycle         
-       0.421331232 seconds time elapsed
+     1,308,601,918      cycles                           #    2.845 GHz                    
+     2,529,434,362      instructions                     #    1.93  insn per cycle         
+       0.460618771 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1867) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955698961392
 Relative difference = 2.9116235141448046e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.995188e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.099116e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.099116e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.884808e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.904943e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.904943e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.438175 sec
+TOTAL       :     0.475883 sec
 INFO: No Floating Point Exceptions have been reported
-     1,071,487,340      cycles                           #    2.420 GHz                    
-     2,074,189,485      instructions                     #    1.94  insn per cycle         
-       0.443244948 seconds time elapsed
+     1,154,211,341      cycles                           #    2.384 GHz                    
+     2,131,381,757      instructions                     #    1.85  insn per cycle         
+       0.484642507 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1011) (512y:    5) (512z: 1292)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955262403935
 Relative difference = 3.207154680524219e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
index 3f432b655a..5f533fb3cd 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:01:45
+DATE: 2024-09-15_12:18:10
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.154808e+08                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.194582e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.565830e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.019765e+08                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.955728e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.339790e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486732e-01 +- 3.293572e-05 )  GeV^0
-TOTAL       :     0.479664 sec
+TOTAL       :     0.485800 sec
 INFO: No Floating Point Exceptions have been reported
-     2,066,272,973      cycles                           #    2.913 GHz                    
-     2,959,541,266      instructions                     #    1.43  insn per cycle         
-       0.768366251 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,046,836,184      cycles                           #    2.868 GHz                    
+     2,861,763,521      instructions                     #    1.40  insn per cycle         
+       0.770456449 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 86
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477195e-01
 Avg ME (F77/GPU)   = 0.14771956525510177
 Relative difference = 4.4175008557828484e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.348696e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.078278e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.078278e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 9.176079e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.058039e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.058039e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     1.208511 sec
+TOTAL       :     1.251903 sec
 INFO: No Floating Point Exceptions have been reported
-     3,615,745,577      cycles                           #    2.981 GHz                    
-     9,472,516,585      instructions                     #    2.62  insn per cycle         
-       1.213452227 seconds time elapsed
+     3,703,836,740      cycles                           #    2.940 GHz                    
+     9,528,821,992      instructions                     #    2.57  insn per cycle         
+       1.260572218 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  367) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956094773486
 Relative difference = 2.643675256627469e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.256979e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.415783e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.415783e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.192175e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.322296e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.322296e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293563e-05 )  GeV^0
-TOTAL       :     0.551273 sec
+TOTAL       :     0.587889 sec
 INFO: No Floating Point Exceptions have been reported
-     1,631,110,236      cycles                           #    2.935 GHz                    
-     3,934,262,602      instructions                     #    2.41  insn per cycle         
-       0.556359995 seconds time elapsed
+     1,712,573,858      cycles                           #    2.874 GHz                    
+     3,991,164,090      instructions                     #    2.33  insn per cycle         
+       0.596469979 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1517) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955861942843
 Relative difference = 2.80129187869649e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.079393e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.400939e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.400939e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.984131e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.291891e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.291891e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.425944 sec
+TOTAL       :     0.460621 sec
 INFO: No Floating Point Exceptions have been reported
-     1,245,519,260      cycles                           #    2.895 GHz                    
-     2,482,547,459      instructions                     #    1.99  insn per cycle         
-       0.430910478 seconds time elapsed
+     1,332,768,943      cycles                           #    2.844 GHz                    
+     2,539,760,549      instructions                     #    1.91  insn per cycle         
+       0.469223881 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1815) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955698961392
 Relative difference = 2.9116235141448046e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.175230e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.704334e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.704334e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.068338e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.608348e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.608348e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293562e-05 )  GeV^0
-TOTAL       :     0.415401 sec
+TOTAL       :     0.451660 sec
 INFO: No Floating Point Exceptions have been reported
-     1,217,462,881      cycles                           #    2.901 GHz                    
-     2,459,454,464      instructions                     #    2.02  insn per cycle         
-       0.420406157 seconds time elapsed
+     1,303,705,490      cycles                           #    2.835 GHz                    
+     2,516,660,988      instructions                     #    1.93  insn per cycle         
+       0.460426647 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1776) (512y:    1) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955698961392
 Relative difference = 2.9116235141448046e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.042589e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.197180e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.197180e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.904674e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.952335e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.952335e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486735e-01 +- 3.293561e-05 )  GeV^0
-TOTAL       :     0.431010 sec
+TOTAL       :     0.470523 sec
 INFO: No Floating Point Exceptions have been reported
-     1,065,793,790      cycles                           #    2.448 GHz                    
-     2,057,938,519      instructions                     #    1.93  insn per cycle         
-       0.435976515 seconds time elapsed
+     1,148,816,748      cycles                           #    2.401 GHz                    
+     2,115,600,264      instructions                     #    1.84  insn per cycle         
+       0.478989217 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  906) (512y:    5) (512z: 1273)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771955262403935
 Relative difference = 3.207154680524219e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
index b59eca5eef..45ada3a90e 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:01:56
+DATE: 2024-09-15_12:18:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.711022e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.444825e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.891145e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.081665e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.353918e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.800720e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.510096 sec
+TOTAL       :     0.522074 sec
 INFO: No Floating Point Exceptions have been reported
-     2,197,599,535      cycles                           #    2.926 GHz                    
-     3,114,723,183      instructions                     #    1.42  insn per cycle         
-       0.808466821 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,188,417,927      cycles                           #    2.886 GHz                    
+     3,109,980,535      instructions                     #    1.42  insn per cycle         
+       0.814803667 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 130
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477196e-01
 Avg ME (F77/GPU)   = 0.14771956187351573
 Relative difference = 2.5810037581511336e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.059352e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.033640e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.033640e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.910495e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.017920e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.017920e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.268650 sec
+TOTAL       :     1.334640 sec
 INFO: No Floating Point Exceptions have been reported
-     3,771,710,829      cycles                           #    2.962 GHz                    
-     9,754,505,592      instructions                     #    2.59  insn per cycle         
-       1.273897417 seconds time elapsed
+     3,942,717,867      cycles                           #    2.929 GHz                    
+     9,888,397,619      instructions                     #    2.51  insn per cycle         
+       1.346816311 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  341) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956645541506
 Relative difference = 2.270828308707201e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.558332e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.007967e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.007967e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.550381e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.026476e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.026476e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.783008 sec
+TOTAL       :     0.835395 sec
 INFO: No Floating Point Exceptions have been reported
-     2,310,402,593      cycles                           #    2.933 GHz                    
-     5,917,549,361      instructions                     #    2.56  insn per cycle         
-       0.788339620 seconds time elapsed
+     2,474,407,927      cycles                           #    2.921 GHz                    
+     6,051,781,084      instructions                     #    2.45  insn per cycle         
+       0.847852996 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1410) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956645541506
 Relative difference = 2.270828308707201e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.331953e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.457739e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.457739e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.251869e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.352067e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.352067e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.560431 sec
+TOTAL       :     0.625126 sec
 INFO: No Floating Point Exceptions have been reported
-     1,634,630,269      cycles                           #    2.893 GHz                    
-     3,254,908,835      instructions                     #    1.99  insn per cycle         
-       0.565744522 seconds time elapsed
+     1,795,351,792      cycles                           #    2.819 GHz                    
+     3,389,782,871      instructions                     #    1.89  insn per cycle         
+       0.637929251 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1567) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.370507e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.529509e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.529509e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.324637e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.489814e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.489814e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.553314 sec
+TOTAL       :     0.609151 sec
 INFO: No Floating Point Exceptions have been reported
-     1,598,397,792      cycles                           #    2.865 GHz                    
-     3,210,982,116      instructions                     #    2.01  insn per cycle         
-       0.558573463 seconds time elapsed
+     1,759,711,411      cycles                           #    2.834 GHz                    
+     3,345,109,138      instructions                     #    1.90  insn per cycle         
+       0.621588850 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1446) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.239065e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.276775e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.276775e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.155541e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.119089e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.119089e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.581191 sec
+TOTAL       :     0.645680 sec
 INFO: No Floating Point Exceptions have been reported
-     1,341,912,926      cycles                           #    2.290 GHz                    
-     2,377,902,713      instructions                     #    1.77  insn per cycle         
-       0.586599513 seconds time elapsed
+     1,520,276,942      cycles                           #    2.311 GHz                    
+     2,512,095,426      instructions                     #    1.65  insn per cycle         
+       0.658351218 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  768) (512y:   64) (512z: 1063)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
diff --git a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
index 0761d80f6f..635fef145f 100644
--- a/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggt1t1_mad/log_susyggt1t1_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x'
 
-DATE: 2024-09-02_00:02:08
+DATE: 2024-09-15_12:18:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 6.786230e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.554941e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.045306e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.163952e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.460728e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.975996e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.511925 sec
+TOTAL       :     0.521235 sec
 INFO: No Floating Point Exceptions have been reported
-     2,190,313,449      cycles                           #    2.920 GHz                    
-     3,134,280,951      instructions                     #    1.43  insn per cycle         
-       0.807505561 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,178,235,183      cycles                           #    2.877 GHz                    
+     3,088,126,574      instructions                     #    1.42  insn per cycle         
+       0.814490194 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 1.477196e-01
 Avg ME (F77/GPU)   = 0.14771956187351573
 Relative difference = 2.5810037581511336e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.171250e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.046529e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.046529e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.905010e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.017534e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.017534e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     1.252183 sec
+TOTAL       :     1.334358 sec
 INFO: No Floating Point Exceptions have been reported
-     3,761,080,220      cycles                           #    2.993 GHz                    
-     9,645,091,040      instructions                     #    2.56  insn per cycle         
-       1.257461532 seconds time elapsed
+     3,930,631,045      cycles                           #    2.921 GHz                    
+     9,778,615,750      instructions                     #    2.49  insn per cycle         
+       1.346795690 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  359) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956645541506
 Relative difference = 2.270828308707201e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 1.525381e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.962903e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.962903e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.520527e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.978156e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.978156e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.797796 sec
+TOTAL       :     0.848228 sec
 INFO: No Floating Point Exceptions have been reported
-     2,292,955,540      cycles                           #    2.859 GHz                    
-     5,860,568,229      instructions                     #    2.56  insn per cycle         
-       0.802900333 seconds time elapsed
+     2,460,773,168      cycles                           #    2.862 GHz                    
+     5,993,984,003      instructions                     #    2.44  insn per cycle         
+       0.860657174 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 1368) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956645541506
 Relative difference = 2.270828308707201e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.268434e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.328040e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.328040e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.233001e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.325225e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.325225e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.572092 sec
+TOTAL       :     0.627322 sec
 INFO: No Floating Point Exceptions have been reported
-     1,644,248,919      cycles                           #    2.852 GHz                    
-     3,218,953,769      instructions                     #    1.96  insn per cycle         
-       0.577145152 seconds time elapsed
+     1,810,072,132      cycles                           #    2.832 GHz                    
+     3,352,499,816      instructions                     #    1.85  insn per cycle         
+       0.639697989 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1483) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.381298e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.582145e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.582145e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.331445e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.508166e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.508166e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.551151 sec
+TOTAL       :     0.606762 sec
 INFO: No Floating Point Exceptions have been reported
-     1,601,842,641      cycles                           #    2.882 GHz                    
-     3,182,853,937      instructions                     #    1.99  insn per cycle         
-       0.556446747 seconds time elapsed
+     1,747,202,335      cycles                           #    2.825 GHz                    
+     3,316,993,487      instructions                     #    1.90  insn per cycle         
+       0.619170203 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1382) (512y:  101) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_T1T1X_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 2.271455e+06                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.309008e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.309008e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 2.144157e+06                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.097547e+06                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.097547e+06                 )  sec^-1
 MeanMatrixElemValue         = ( 1.486736e-01 +- 3.293564e-05 )  GeV^0
-TOTAL       :     0.572096 sec
+TOTAL       :     0.647875 sec
 INFO: No Floating Point Exceptions have been reported
-     1,360,273,199      cycles                           #    2.358 GHz                    
-     2,361,960,995      instructions                     #    1.74  insn per cycle         
-       0.577285951 seconds time elapsed
+     1,527,422,709      cycles                           #    2.315 GHz                    
+     2,496,191,682      instructions                     #    1.63  insn per cycle         
+       0.660479795 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2:  716) (512y:   64) (512z: 1054)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 6 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 6 channels { 2 : 128, 3 : 96, 4 : 96, 5 : 96, 6 : 96 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_t1t1.mad/SubProcesses/P1_gg_t1t1x/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 1.477196e-01
 Avg ME (F77/C++)    = 0.14771956674392650
 Relative difference = 2.2512972893324335e-07
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
index aaac9a4e49..0f0996a4b7 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:58:46
+DATE: 2024-09-15_12:15:09
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.653027e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.340470e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.925372e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.705596e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.093007e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.806904e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.526809 sec
+TOTAL       :     0.547454 sec
 INFO: No Floating Point Exceptions have been reported
-     2,213,151,288      cycles                           #    2.920 GHz                    
-     3,205,651,873      instructions                     #    1.45  insn per cycle         
-       0.815245250 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,244,643,125      cycles                           #    2.856 GHz                    
+     3,194,753,552      instructions                     #    1.42  insn per cycle         
+       0.844381889 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015836e+00
 Avg ME (F77/GPU)   = 2.0158358666195562
 Relative difference = 6.616631711254798e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.860503e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.908123e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.908123e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.817695e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.865026e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.865026e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.740977 sec
+TOTAL       :     5.917395 sec
 INFO: No Floating Point Exceptions have been reported
-    17,253,451,365      cycles                           #    3.003 GHz                    
-    45,921,355,629      instructions                     #    2.66  insn per cycle         
-       5.746162125 seconds time elapsed
+    17,420,747,172      cycles                           #    2.939 GHz                    
+    46,039,408,535      instructions                     #    2.64  insn per cycle         
+       5.929443281 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  622) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194407
 Relative difference = 6.616637439061751e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.237581e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.395323e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.395323e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.177458e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.337417e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.337417e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.343336 sec
+TOTAL       :     3.449474 sec
 INFO: No Floating Point Exceptions have been reported
-    10,011,623,439      cycles                           #    2.991 GHz                    
-    27,803,676,769      instructions                     #    2.78  insn per cycle         
-       3.348521908 seconds time elapsed
+    10,167,811,545      cycles                           #    2.940 GHz                    
+    27,922,488,818      instructions                     #    2.75  insn per cycle         
+       3.461267593 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2533) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194411
 Relative difference = 6.616637417031725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.131063e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.533661e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.533661e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.001375e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.394642e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.394642e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.150729 sec
+TOTAL       :     2.246102 sec
 INFO: No Floating Point Exceptions have been reported
-     6,070,393,401      cycles                           #    2.817 GHz                    
-    12,586,845,951      instructions                     #    2.07  insn per cycle         
-       2.155997201 seconds time elapsed
+     6,225,214,133      cycles                           #    2.758 GHz                    
+    12,703,481,596      instructions                     #    2.04  insn per cycle         
+       2.257992148 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2620) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.540655e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.005391e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.005391e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.452222e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.920256e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.920256e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     1.997882 sec
+TOTAL       :     2.070361 sec
 INFO: No Floating Point Exceptions have been reported
-     5,577,536,285      cycles                           #    2.786 GHz                    
-    12,004,229,035      instructions                     #    2.15  insn per cycle         
-       2.003159970 seconds time elapsed
+     5,740,692,800      cycles                           #    2.758 GHz                    
+    12,120,362,498      instructions                     #    2.11  insn per cycle         
+       2.082196362 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2363) (512y:  144) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.615033e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.803174e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.803174e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.496236e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.681187e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.681187e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.007089 sec
+TOTAL       :     3.148263 sec
 INFO: No Floating Point Exceptions have been reported
-     5,749,608,423      cycles                           #    1.909 GHz                    
-     8,342,953,353      instructions                     #    1.45  insn per cycle         
-       3.012371924 seconds time elapsed
+     5,893,231,770      cycles                           #    1.865 GHz                    
+     8,460,083,225      instructions                     #    1.44  insn per cycle         
+       3.160116132 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1468) (512y:  122) (512z: 1806)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
index 378b21a230..b863aa4b8d 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_d_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:59:11
+DATE: 2024-09-15_12:15:34
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:DBL+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.662858e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.418241e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.001899e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.254248e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.331434e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.002046e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.525518 sec
+TOTAL       :     0.532593 sec
 INFO: No Floating Point Exceptions have been reported
-     2,132,676,957      cycles                           #    2.812 GHz                    
-     3,121,749,820      instructions                     #    1.46  insn per cycle         
-       0.815010260 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,206,521,572      cycles                           #    2.874 GHz                    
+     3,181,038,873      instructions                     #    1.44  insn per cycle         
+       0.824867346 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_d_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015836e+00
 Avg ME (F77/GPU)   = 2.0158358666195562
 Relative difference = 6.616631711254798e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_d_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.908389e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.958750e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.958750e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.847951e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.897550e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.897550e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.599275 sec
+TOTAL       :     5.824198 sec
 INFO: No Floating Point Exceptions have been reported
-    16,763,422,918      cycles                           #    2.992 GHz                    
-    44,907,263,948      instructions                     #    2.68  insn per cycle         
-       5.604556101 seconds time elapsed
+    17,074,611,956      cycles                           #    2.927 GHz                    
+    45,037,522,622      instructions                     #    2.64  insn per cycle         
+       5.835488505 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  566) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194411
 Relative difference = 6.616637417031725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.408622e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.585143e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.585143e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.339846e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.517622e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.517622e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.182414 sec
+TOTAL       :     3.286951 sec
 INFO: No Floating Point Exceptions have been reported
-     9,555,914,308      cycles                           #    2.999 GHz                    
-    26,687,968,907      instructions                     #    2.79  insn per cycle         
-       3.187704655 seconds time elapsed
+     9,688,702,526      cycles                           #    2.938 GHz                    
+    26,805,473,197      instructions                     #    2.77  insn per cycle         
+       3.298888236 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2327) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194411
 Relative difference = 6.616637417031725e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.659733e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.989188e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.989188e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.556976e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.882070e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.882070e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.355005 sec
+TOTAL       :     2.448521 sec
 INFO: No Floating Point Exceptions have been reported
-     6,586,794,349      cycles                           #    2.792 GHz                    
-    14,110,405,758      instructions                     #    2.14  insn per cycle         
-       2.360343248 seconds time elapsed
+     6,771,268,311      cycles                           #    2.753 GHz                    
+    14,227,806,494      instructions                     #    2.10  insn per cycle         
+       2.460277833 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2704) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.851705e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.216127e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.216127e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.776530e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.130549e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.130549e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.266798 sec
+TOTAL       :     2.341767 sec
 INFO: No Floating Point Exceptions have been reported
-     6,332,630,844      cycles                           #    2.788 GHz                    
-    13,705,013,933      instructions                     #    2.16  insn per cycle         
-       2.272035255 seconds time elapsed
+     6,488,711,878      cycles                           #    2.758 GHz                    
+    13,822,301,429      instructions                     #    2.13  insn per cycle         
+       2.353629315 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2355) (512y:  297) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:DBL+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = DOUBLE (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.432050e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.603910e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.603910e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.365014e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.535721e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.535721e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.161293 sec
+TOTAL       :     3.263382 sec
 INFO: No Floating Point Exceptions have been reported
-     5,942,810,149      cycles                           #    1.877 GHz                    
-    10,101,972,904      instructions                     #    1.70  insn per cycle         
-       3.166381476 seconds time elapsed
+     6,085,804,948      cycles                           #    1.859 GHz                    
+    10,219,161,569      instructions                     #    1.68  insn per cycle         
+       3.275179492 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1318) (512y:  208) (512z: 1986)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_d_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158358666194953
 Relative difference = 6.616634729368461e-08
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
index f9afc129d0..cf83c07d47 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:59:36
+DATE: 2024-09-15_12:16:00
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.048309e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.804245e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.916971e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.223710e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.732879e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.860967e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.487622 sec
+TOTAL       :     0.489964 sec
 INFO: No Floating Point Exceptions have been reported
-     2,070,419,388      cycles                           #    2.916 GHz                    
-     2,949,604,523      instructions                     #    1.42  insn per cycle         
-       0.768904766 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,041,886,975      cycles                           #    2.853 GHz                    
+     2,932,689,889      instructions                     #    1.44  insn per cycle         
+       0.773926194 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 125
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015841e+00
 Avg ME (F77/GPU)   = 2.0158787037944421
 Relative difference = 1.870375413642407e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.966520e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 2.023235e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 2.023235e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.929290e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.984521e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.984521e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.417892 sec
+TOTAL       :     5.538462 sec
 INFO: No Floating Point Exceptions have been reported
-    16,207,150,412      cycles                           #    2.990 GHz                    
-    45,319,225,618      instructions                     #    2.80  insn per cycle         
-       5.422848534 seconds time elapsed
+    16,282,391,613      cycles                           #    2.936 GHz                    
+    45,369,954,990      instructions                     #    2.79  insn per cycle         
+       5.546087919 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  600) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015849e+00
 Avg ME (F77/C++)    = 2.0158491701586172
 Relative difference = 8.441039850630506e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.597400e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.936181e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.936181e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.517678e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.857555e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.857555e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.366627 sec
+TOTAL       :     2.425578 sec
 INFO: No Floating Point Exceptions have been reported
-     7,070,935,943      cycles                           #    2.982 GHz                    
-    17,771,231,538      instructions                     #    2.51  insn per cycle         
-       2.371927913 seconds time elapsed
+     7,146,342,805      cycles                           #    2.938 GHz                    
+    17,820,817,556      instructions                     #    2.49  insn per cycle         
+       2.433499088 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 3136) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015849e+00
 Avg ME (F77/C++)    = 2.0158486895961687
 Relative difference = 1.539816876576819e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.462382e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.622186e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.622186e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.300139e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.447005e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.447005e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.325025 sec
+TOTAL       :     1.366410 sec
 INFO: No Floating Point Exceptions have been reported
-     3,738,303,869      cycles                           #    2.812 GHz                    
-     8,265,337,032      instructions                     #    2.21  insn per cycle         
-       1.330172400 seconds time elapsed
+     3,812,530,133      cycles                           #    2.776 GHz                    
+     8,314,531,864      instructions                     #    2.18  insn per cycle         
+       1.374237525 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3369) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015847e+00
 Avg ME (F77/C++)    = 2.0158474864438176
 Relative difference = 2.4130988992271984e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 8.968780e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.027488e+06                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.027488e+06                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 8.645853e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.897799e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.897799e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.252562 sec
+TOTAL       :     1.316285 sec
 INFO: No Floating Point Exceptions have been reported
-     3,553,623,001      cycles                           #    2.827 GHz                    
-     7,920,395,453      instructions                     #    2.23  insn per cycle         
-       1.257589697 seconds time elapsed
+     3,675,425,998      cycles                           #    2.778 GHz                    
+     7,974,219,247      instructions                     #    2.17  insn per cycle         
+       1.323972787 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3213) (512y:   20) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015847e+00
 Avg ME (F77/C++)    = 2.0158474864438176
 Relative difference = 2.4130988992271984e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 6.786175e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 7.483095e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 7.483095e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.513578e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 7.178216e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 7.178216e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.630134 sec
+TOTAL       :     1.714470 sec
 INFO: No Floating Point Exceptions have been reported
-     3,247,631,595      cycles                           #    1.990 GHz                    
-     6,100,927,102      instructions                     #    1.88  insn per cycle         
-       1.635147155 seconds time elapsed
+     3,315,579,741      cycles                           #    1.925 GHz                    
+     6,150,343,295      instructions                     #    1.85  insn per cycle         
+       1.722857238 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2256) (512y:   24) (512z: 2156)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015848e+00
 Avg ME (F77/C++)    = 2.0158476348733529
 Relative difference = 1.8112806478434436e-07
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
index 958f808733..60f2dad34a 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_f_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-01_23:59:56
+DATE: 2024-09-15_12:16:21
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:FLT+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 9.035830e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.802180e+08                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.914241e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 7.969923e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.737348e+08                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.863422e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072877e+00 +- 3.361153e-03 )  GeV^0
-TOTAL       :     0.483129 sec
+TOTAL       :     0.488943 sec
 INFO: No Floating Point Exceptions have been reported
-     2,054,954,765      cycles                           #    2.911 GHz                    
-     2,972,387,315      instructions                     #    1.45  insn per cycle         
-       0.764795813 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,058,928,554      cycles                           #    2.875 GHz                    
+     2,909,617,560      instructions                     #    1.41  insn per cycle         
+       0.773317754 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 124
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_f_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015841e+00
 Avg ME (F77/GPU)   = 2.0158787037944421
 Relative difference = 1.870375413642407e-05
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_f_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.906781e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.962993e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.962993e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.962212e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 2.019252e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 2.019252e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361545e-03 )  GeV^0
-TOTAL       :     5.586697 sec
+TOTAL       :     5.446348 sec
 INFO: No Floating Point Exceptions have been reported
-    15,967,526,620      cycles                           #    2.856 GHz                    
-    44,427,051,711      instructions                     #    2.78  insn per cycle         
-       5.591572721 seconds time elapsed
+    16,014,544,982      cycles                           #    2.937 GHz                    
+    44,474,347,041      instructions                     #    2.78  insn per cycle         
+       5.454124254 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  533) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015849e+00
 Avg ME (F77/C++)    = 2.0158491701586172
 Relative difference = 8.441039850630506e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.113742e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.566435e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.566435e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.286669e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.759924e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.759924e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072937e+00 +- 3.361544e-03 )  GeV^0
-TOTAL       :     2.138442 sec
+TOTAL       :     2.086643 sec
 INFO: No Floating Point Exceptions have been reported
-     6,100,101,446      cycles                           #    2.848 GHz                    
-    17,077,931,441      instructions                     #    2.80  insn per cycle         
-       2.143473002 seconds time elapsed
+     6,135,728,749      cycles                           #    2.931 GHz                    
+    17,120,648,230      instructions                     #    2.79  insn per cycle         
+       2.094524948 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2863) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015849e+00
 Avg ME (F77/C++)    = 2.0158486895961687
 Relative difference = 1.539816876576819e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.782057e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.310209e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.310209e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.052770e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.643067e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.643067e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.902526 sec
+TOTAL       :     1.835441 sec
 INFO: No Floating Point Exceptions have been reported
-     5,024,219,960      cycles                           #    2.635 GHz                    
-    10,224,646,422      instructions                     #    2.04  insn per cycle         
-       1.908281978 seconds time elapsed
+     5,101,873,696      cycles                           #    2.769 GHz                    
+    10,273,156,361      instructions                     #    2.01  insn per cycle         
+       1.843297684 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3906) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015847e+00
 Avg ME (F77/C++)    = 2.0158474864438176
 Relative difference = 2.4130988992271984e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.883713e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 6.430713e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 6.430713e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 6.133897e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 6.741009e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 6.741009e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     1.867850 sec
+TOTAL       :     1.812216 sec
 INFO: No Floating Point Exceptions have been reported
-     4,959,367,813      cycles                           #    2.649 GHz                    
-     9,995,718,795      instructions                     #    2.02  insn per cycle         
-       1.872847397 seconds time elapsed
+     5,041,846,676      cycles                           #    2.771 GHz                    
+    10,042,915,318      instructions                     #    1.99  insn per cycle         
+       1.820042823 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 3805) (512y:    2) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015847e+00
 Avg ME (F77/C++)    = 2.0158474864438176
 Relative difference = 2.4130988992271984e-07
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:FLT+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = FLOAT (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[16] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.458764e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.763718e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.763718e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.642094e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.969408e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.969408e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072967e+00 +- 3.361967e-03 )  GeV^0
-TOTAL       :     2.437626 sec
+TOTAL       :     2.363033 sec
 INFO: No Floating Point Exceptions have been reported
-     4,360,017,003      cycles                           #    1.787 GHz                    
-     8,444,019,774      instructions                     #    1.94  insn per cycle         
-       2.442485988 seconds time elapsed
+     4,430,997,247      cycles                           #    1.870 GHz                    
+     8,493,309,798      instructions                     #    1.92  insn per cycle         
+       2.370917653 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2744) (512y:    4) (512z: 2754)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_f_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015848e+00
 Avg ME (F77/C++)    = 2.0158476348733529
 Relative difference = 1.8112806478434436e-07
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
index 184bc580e3..62fab95ac2 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd0.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-02_00:00:18
+DATE: 2024-09-15_12:16:43
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.681342e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.493674e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.010313e+08                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.370448e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.371457e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.004604e+08                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.527962 sec
+TOTAL       :     0.536026 sec
 INFO: No Floating Point Exceptions have been reported
-     2,142,172,279      cycles                           #    2.815 GHz                    
-     3,110,651,634      instructions                     #    1.45  insn per cycle         
-       0.817661924 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
+     2,214,409,122      cycles                           #    2.867 GHz                    
+     3,163,292,335      instructions                     #    1.43  insn per cycle         
+       0.830149622 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 214
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd0/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015836e+00
 Avg ME (F77/GPU)   = 2.0158358639104246
 Relative difference = 6.751024171044779e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd0/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.743073e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.787260e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.787260e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.787519e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.833765e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.833765e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     6.126680 sec
+TOTAL       :     6.014473 sec
 INFO: No Floating Point Exceptions have been reported
-    17,463,177,818      cycles                           #    2.850 GHz                    
-    46,087,857,513      instructions                     #    2.64  insn per cycle         
-       6.131880330 seconds time elapsed
+    17,675,190,561      cycles                           #    2.934 GHz                    
+    46,198,484,525      instructions                     #    2.61  insn per cycle         
+       6.025789457 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  622) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359218686011
 Relative difference = 3.8758807327712803e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.106944e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.265946e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.265946e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.209372e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.373008e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.373008e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.486520 sec
+TOTAL       :     3.414727 sec
 INFO: No Floating Point Exceptions have been reported
-     9,905,155,322      cycles                           #    2.838 GHz                    
-    27,598,829,825      instructions                     #    2.79  insn per cycle         
-       3.491810860 seconds time elapsed
+    10,062,586,014      cycles                           #    2.937 GHz                    
+    27,715,049,037      instructions                     #    2.75  insn per cycle         
+       3.427097999 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2582) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359218686011
 Relative difference = 3.8758807327712803e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.849477e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.217209e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.217209e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.030622e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.429639e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.429639e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.271105 sec
+TOTAL       :     2.231035 sec
 INFO: No Floating Point Exceptions have been reported
-     6,003,648,111      cycles                           #    2.639 GHz                    
-    12,489,197,646      instructions                     #    2.08  insn per cycle         
-       2.276156194 seconds time elapsed
+     6,157,448,048      cycles                           #    2.747 GHz                    
+    12,606,647,104      instructions                     #    2.05  insn per cycle         
+       2.242669652 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2777) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 5.363313e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 5.814874e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 5.814874e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 5.510413e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.987462e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.987462e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.063360 sec
+TOTAL       :     2.049793 sec
 INFO: No Floating Point Exceptions have been reported
-     5,497,285,854      cycles                           #    2.660 GHz                    
-    11,926,850,128      instructions                     #    2.17  insn per cycle         
-       2.068523111 seconds time elapsed
+     5,651,790,254      cycles                           #    2.742 GHz                    
+    12,043,922,780      instructions                     #    2.13  insn per cycle         
+       2.061986198 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2522) (512y:  146) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=0]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.420824e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.598692e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.598692e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.559881e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.752268e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.752268e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.174262 sec
+TOTAL       :     3.093339 sec
 INFO: No Floating Point Exceptions have been reported
-     5,615,505,273      cycles                           #    1.767 GHz                    
-     8,115,440,716      instructions                     #    1.45  insn per cycle         
-       3.179484468 seconds time elapsed
+     5,777,925,002      cycles                           #    1.861 GHz                    
+     8,230,989,757      instructions                     #    1.42  insn per cycle         
+       3.105063126 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1671) (512y:  126) (512z: 1866)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd0/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08
diff --git a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
index 916e38c1bf..bde416a886 100644
--- a/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
+++ b/epochX/cudacpp/tput/logs_susyggtt_mad/log_susyggtt_mad_m_inl0_hrd1.txt
@@ -1,5 +1,5 @@
 
-Building in /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
+Building in /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx
 BACKEND=cpp512y (was cppauto)
 OMPFLAGS=
 FPTYPE='d'
@@ -11,58 +11,58 @@ Building in BUILDDIR=build.auto_d_inl0_hrd0 for tag=512y_d_inl0_hrd0_hasCurand_h
 make: Nothing to be done for 'gtestlibs'.
 
 make USEBUILDDIR=1 BACKEND=cuda
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppnone
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppsse4
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cppavx2
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512y
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
 make USEBUILDDIR=1 BACKEND=cpp512z
-make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Entering directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 make[1]: Nothing to be done for 'all'.
-make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
+make[1]: Leaving directory '/data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx'
 
-DATE: 2024-09-02_00:00:44
+DATE: 2024-09-15_12:17:08
 
 On itscrd90.cern.ch [CPU: Intel(R) Xeon(R) Silver 4216 CPU] [GPU: 1x Tesla V100S-PCIE-32GB]:
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CUDA [nvcc 12.0.140 (gcc 11.3.1)] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CUD:MIX+THX:CURDEV+RMBDEV+MESDEV/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
-EvtsPerSec[Rmb+ME]     (23) = ( 4.631870e+07                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 9.275543e+07                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 9.856692e+07                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.300920e+07                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 9.324653e+07                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 9.961570e+07                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     0.526521 sec
+TOTAL       :     0.533607 sec
 INFO: No Floating Point Exceptions have been reported
-     2,129,961,314      cycles                           #    2.806 GHz                    
-     3,087,664,389      instructions                     #    1.45  insn per cycle         
-       0.815795024 seconds time elapsed
-runNcu /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
+     2,198,649,887      cycles                           #    2.849 GHz                    
+     3,081,934,025      instructions                     #    1.40  insn per cycle         
+       0.827713906 seconds time elapsed
+runNcu /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe -p 2048 256 1
 ==PROF== Profiling "sigmaKin": launch__registers_per_thread 212
 ==PROF== Profiling "sigmaKin": sm__sass_average_branch_targets_threads_uniform.pct 100%
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/runTest_cuda.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -70,33 +70,33 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/check_cuda.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.cuda_m_inl0_hrd1/fcheck_cuda.exe 2 64 2
 Avg ME (C++/GPU)   = 2.015836e+00
 Avg ME (F77/GPU)   = 2.0158358639104246
 Relative difference = 6.751024171044779e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-Not found: /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe
+Not found: /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.hip_m_inl0_hrd1/check_hip.exe
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/none+NAVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = SCALAR ('none': ~vector[1], no SIMD)
-EvtsPerSec[Rmb+ME]     (23) = ( 1.803541e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 1.850201e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 1.850201e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 1.850472e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 1.899391e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 1.899391e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     5.922063 sec
+TOTAL       :     5.814253 sec
 INFO: No Floating Point Exceptions have been reported
-    16,951,887,322      cycles                           #    2.861 GHz                    
-    45,094,274,315      instructions                     #    2.66  insn per cycle         
-       5.927079923 seconds time elapsed
+    17,118,477,102      cycles                           #    2.939 GHz                    
+    45,207,445,046      instructions                     #    2.64  insn per cycle         
+       5.826249043 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:  567) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -104,31 +104,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.none_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359218686011
 Relative difference = 3.8758807327712803e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/sse4+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[2] ('sse4': SSE4.2, 128bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.271198e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.440809e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.440809e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.320488e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.495447e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.495447e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.314709 sec
+TOTAL       :     3.305181 sec
 INFO: No Floating Point Exceptions have been reported
-     9,491,443,814      cycles                           #    2.860 GHz                    
-    26,243,392,606      instructions                     #    2.76  insn per cycle         
-       3.319931879 seconds time elapsed
+     9,752,287,704      cycles                           #    2.941 GHz                    
+    26,369,462,343      instructions                     #    2.70  insn per cycle         
+       3.316567159 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4: 2386) (avx2:    0) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -136,31 +136,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.sse4_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359218686011
 Relative difference = 3.8758807327712803e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/avx2+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('avx2': AVX2, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.278974e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.566310e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.566310e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.466984e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 4.783441e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 4.783441e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.558625 sec
+TOTAL       :     2.496548 sec
 INFO: No Floating Point Exceptions have been reported
-     6,762,791,528      cycles                           #    2.640 GHz                    
-    14,038,966,937      instructions                     #    2.08  insn per cycle         
-       2.563817269 seconds time elapsed
+     6,902,736,140      cycles                           #    2.753 GHz                    
+    14,146,955,352      instructions                     #    2.05  insn per cycle         
+       2.508688639 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2895) (512y:    0) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -168,31 +168,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.avx2_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512y+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[4] ('512y': AVX512, 256bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 4.605515e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 4.931896e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 4.931896e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 4.747061e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 5.095585e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 5.095585e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     2.383269 sec
+TOTAL       :     2.355880 sec
 INFO: No Floating Point Exceptions have been reported
-     6,370,579,818      cycles                           #    2.668 GHz                    
-    13,515,010,631      instructions                     #    2.12  insn per cycle         
-       2.388311155 seconds time elapsed
+     6,536,932,805      cycles                           #    2.762 GHz                    
+    13,633,905,312      instructions                     #    2.09  insn per cycle         
+       2.367915662 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 2531) (512y:  302) (512z:    0)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -200,31 +200,31 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512y_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08
 OK (relative difference <= 5E-3)
 =========================================================================
-runExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
+runExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe -p 2048 256 2 OMP=
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 Process                     = SIGMA_MSSM_SLHA2_GG_TTX_CPP [gcc 11.3.1] [inlineHel=0] [hardcodePARAM=1]
 Workflow summary            = CPP:MIX+CXS:CURHST+RMBHST+MESHST/512z+CXVBRK
 FP precision                = MIXED (NaN/abnormal=0, zero=0)
 Internal loops fptype_sv    = VECTOR[8] ('512z': AVX512, 512bit) [cxtype_ref=YES]
-EvtsPerSec[Rmb+ME]     (23) = ( 3.438799e+05                 )  sec^-1
-EvtsPerSec[MatrixElems] (3) = ( 3.617303e+05                 )  sec^-1
-EvtsPerSec[MECalcOnly] (3a) = ( 3.617303e+05                 )  sec^-1
+EvtsPerSec[Rmb+ME]     (23) = ( 3.589352e+05                 )  sec^-1
+EvtsPerSec[MatrixElems] (3) = ( 3.785622e+05                 )  sec^-1
+EvtsPerSec[MECalcOnly] (3a) = ( 3.785622e+05                 )  sec^-1
 MeanMatrixElemValue         = ( 2.072848e+00 +- 3.360985e-03 )  GeV^0
-TOTAL       :     3.157631 sec
+TOTAL       :     3.069165 sec
 INFO: No Floating Point Exceptions have been reported
-     5,593,786,532      cycles                           #    1.770 GHz                    
-     9,209,286,956      instructions                     #    1.65  insn per cycle         
-       3.162934960 seconds time elapsed
+     5,741,871,289      cycles                           #    1.864 GHz                    
+     9,325,593,834      instructions                     #    1.62  insn per cycle         
+       3.081760977 seconds time elapsed
 =Symbols in CPPProcess_cpp.o= (~sse4:    0) (avx2: 1456) (512y:  212) (512z: 2059)
 -------------------------------------------------------------------------
-runTest /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
+runTest /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/runTest_cpp.exe
 INFO: The following Floating Point Exceptions will cause SIGFPE program aborts: FE_DIVBYZERO, FE_INVALID, FE_OVERFLOW
 [  PASSED  ] 4 tests.
 DEBUG: MEK (no multichannel) processed 512 events across 3 channels { no-multichannel : 512 }
@@ -232,8 +232,8 @@ INFO: No Floating Point Exceptions have been reported
 DEBUG: MEK (channelid array) processed 512 events across 3 channels { 1 : 192, 2 : 160, 3 : 160 }
 INFO: No Floating Point Exceptions have been reported
 -------------------------------------------------------------------------
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
-cmpExe /data/avalassi/GPU2023/madgraph4gpuX/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/check_cpp.exe --common -p 2 64 2
+cmpExe /data/avalassi/GPU2023/madgraph4gpuBis/epochX/cudacpp/susy_gg_tt.mad/SubProcesses/P1_gg_ttx/build.512z_m_inl0_hrd1/fcheck_cpp.exe 2 64 2
 Avg ME (C++/C++)    = 2.015836e+00
 Avg ME (F77/C++)    = 2.0158359178371690
 Relative difference = 4.0758688308634e-08